1mod correlation;
14mod detection;
15mod filter;
16#[cfg(test)]
17mod tests;
18
19pub use detection::parse_field_spec;
20
21use std::collections::HashMap;
22use std::path::Path;
23
24use serde::Deserialize;
25use yaml_serde::Value;
26
27use crate::ast::*;
28use crate::error::{Result, SigmaParserError};
29
30pub fn parse_sigma_yaml(yaml: &str) -> Result<SigmaCollection> {
41 let mut collection = SigmaCollection::new();
42 let mut global: Option<Value> = None;
43 let mut previous: Option<Value> = None;
44
45 for doc in yaml_serde::Deserializer::from_str(yaml) {
46 let value: Value = match Value::deserialize(doc) {
47 Ok(v) => v,
48 Err(e) => {
49 collection.errors.push(format!("YAML parse error: {e}"));
50 break;
54 }
55 };
56
57 let Some(mapping) = value.as_mapping() else {
58 collection
59 .errors
60 .push("Document is not a YAML mapping".to_string());
61 continue;
62 };
63
64 if let Some(action_val) = mapping.get(Value::String("action".to_string())) {
66 let Some(action) = action_val.as_str() else {
67 collection.errors.push(format!(
68 "collection 'action' must be a string, got: {action_val:?}"
69 ));
70 continue;
71 };
72 match action {
73 "global" => {
74 let mut global_map = value.clone();
75 if let Some(m) = global_map.as_mapping_mut() {
76 m.remove(Value::String("action".to_string()));
77 }
78 global = Some(global_map);
79 continue;
80 }
81 "reset" => {
82 global = None;
83 continue;
84 }
85 "repeat" => {
86 if let Some(ref prev) = previous {
88 let mut repeat_val = value.clone();
89 if let Some(m) = repeat_val.as_mapping_mut() {
90 m.remove(Value::String("action".to_string()));
91 }
92 let merged_repeat = deep_merge(prev.clone(), repeat_val)?;
93
94 let final_val = if let Some(ref global_val) = global {
96 deep_merge(global_val.clone(), merged_repeat)?
97 } else {
98 merged_repeat
99 };
100
101 previous = Some(final_val.clone());
102
103 let mut doc_warnings: Vec<String> = Vec::new();
104 let parsed = parse_document(&final_val, &mut doc_warnings);
105 collection.errors.extend(doc_warnings);
106 match parsed {
107 Ok(doc) => match doc {
108 SigmaDocument::Rule(rule) => collection.rules.push(*rule),
109 SigmaDocument::Correlation(corr) => {
110 collection.correlations.push(corr)
111 }
112 SigmaDocument::Filter(filter) => collection.filters.push(filter),
113 },
114 Err(e) => {
115 collection.errors.push(e.to_string());
116 }
117 }
118 } else {
119 collection
120 .errors
121 .push("'action: repeat' without a previous document".to_string());
122 }
123 continue;
124 }
125 other => {
126 collection
127 .errors
128 .push(format!("Unknown collection action: {other}"));
129 continue;
130 }
131 }
132 }
133
134 let merged = if let Some(ref global_val) = global {
136 deep_merge(global_val.clone(), value)?
137 } else {
138 value
139 };
140
141 previous = Some(merged.clone());
143
144 let mut doc_warnings: Vec<String> = Vec::new();
146 let parsed = parse_document(&merged, &mut doc_warnings);
147 collection.errors.extend(doc_warnings);
148 match parsed {
149 Ok(doc) => match doc {
150 SigmaDocument::Rule(rule) => collection.rules.push(*rule),
151 SigmaDocument::Correlation(corr) => collection.correlations.push(corr),
152 SigmaDocument::Filter(filter) => collection.filters.push(filter),
153 },
154 Err(e) => {
155 collection.errors.push(e.to_string());
156 }
157 }
158 }
159
160 Ok(collection)
161}
162
163pub fn parse_sigma_file(path: &Path) -> Result<SigmaCollection> {
165 let content = std::fs::read_to_string(path)?;
166 parse_sigma_yaml(&content)
167}
168
169pub fn parse_sigma_directory(dir: &Path) -> Result<SigmaCollection> {
171 let mut collection = SigmaCollection::new();
172
173 fn walk(dir: &Path, collection: &mut SigmaCollection) -> Result<()> {
174 for entry in std::fs::read_dir(dir)? {
175 let entry = entry?;
176 let path = entry.path();
177 if path.is_dir() {
178 walk(&path, collection)?;
179 } else if matches!(
180 path.extension().and_then(|e| e.to_str()),
181 Some("yml" | "yaml")
182 ) {
183 match parse_sigma_file(&path) {
184 Ok(sub) => {
185 collection.rules.extend(sub.rules);
186 collection.correlations.extend(sub.correlations);
187 collection.filters.extend(sub.filters);
188 collection.errors.extend(sub.errors);
189 }
190 Err(e) => {
191 collection.errors.push(format!("{}: {e}", path.display()));
192 }
193 }
194 }
195 }
196 Ok(())
197 }
198
199 walk(dir, &mut collection)?;
200 Ok(collection)
201}
202
203fn parse_document(value: &Value, warnings: &mut Vec<String>) -> Result<SigmaDocument> {
211 let mapping = value
212 .as_mapping()
213 .ok_or_else(|| SigmaParserError::InvalidRule("Document is not a YAML mapping".into()))?;
214
215 if mapping.contains_key(Value::String("correlation".into())) {
216 correlation::parse_correlation_rule(value, warnings).map(SigmaDocument::Correlation)
217 } else if mapping.contains_key(Value::String("filter".into())) {
218 filter::parse_filter_rule(value, warnings).map(SigmaDocument::Filter)
219 } else {
220 detection::parse_detection_rule(value, warnings).map(|r| SigmaDocument::Rule(Box::new(r)))
221 }
222}
223
224pub(super) fn collect_custom_attributes(
239 m: &yaml_serde::Mapping,
240 standard_keys: &[&str],
241) -> HashMap<String, Value> {
242 let mut attrs: HashMap<String, Value> = m
243 .iter()
244 .filter_map(|(k, v)| {
245 let key = k.as_str()?;
246 if standard_keys.contains(&key) {
247 None
248 } else {
249 Some((key.to_string(), v.clone()))
250 }
251 })
252 .collect();
253
254 if let Some(Value::Mapping(explicit)) = m.get(val_key("custom_attributes")) {
255 for (k, v) in explicit {
256 if let Some(key) = k.as_str() {
257 attrs.insert(key.to_string(), v.clone());
258 }
259 }
260 }
261
262 attrs
263}
264
265pub(super) fn parse_logsource(value: &Value) -> Result<LogSource> {
266 let m = value
267 .as_mapping()
268 .ok_or_else(|| SigmaParserError::InvalidRule("logsource must be a mapping".into()))?;
269
270 let mut custom = HashMap::new();
271 let known_keys = ["category", "product", "service", "definition"];
272
273 for (k, v) in m {
274 let key_str = k.as_str().unwrap_or("");
275 if !known_keys.contains(&key_str) && !key_str.is_empty() {
276 match v.as_str() {
277 Some(val_str) => {
278 custom.insert(key_str.to_string(), val_str.to_string());
279 }
280 None => {
281 log::warn!(
282 "logsource custom field '{key_str}' has non-string value ({v:?}), skipping"
283 );
284 }
285 }
286 }
287 }
288
289 Ok(LogSource {
290 category: get_str(m, "category").map(|s| s.to_string()),
291 product: get_str(m, "product").map(|s| s.to_string()),
292 service: get_str(m, "service").map(|s| s.to_string()),
293 definition: get_str(m, "definition").map(|s| s.to_string()),
294 custom,
295 })
296}
297
298pub(super) fn parse_related(value: Option<&Value>, warnings: &mut Vec<String>) -> Vec<Related> {
303 let Some(seq_val) = value else {
304 return Vec::new();
305 };
306 let Some(seq) = seq_val.as_sequence() else {
307 warnings.push(format!(
308 "'related' must be a sequence of mappings, got: {seq_val:?}"
309 ));
310 return Vec::new();
311 };
312
313 seq.iter()
314 .enumerate()
315 .filter_map(|(i, item)| {
316 let Some(m) = item.as_mapping() else {
317 warnings.push(format!("related[{i}] is not a mapping: {item:?}"));
318 return None;
319 };
320 let id = match get_str(m, "id") {
321 Some(s) => s.to_string(),
322 None => {
323 warnings.push(format!("related[{i}] missing 'id'"));
324 return None;
325 }
326 };
327 let type_str = match get_str(m, "type") {
328 Some(s) => s,
329 None => {
330 warnings.push(format!("related[{i}] missing 'type'"));
331 return None;
332 }
333 };
334 let relation_type = match type_str.parse() {
335 Ok(t) => t,
336 Err(_) => {
337 warnings.push(format!(
338 "related[{i}] invalid type '{type_str}' (expected one of: \
339 derived, obsolete, merged, renamed, similar)"
340 ));
341 return None;
342 }
343 };
344 Some(Related { id, relation_type })
345 })
346 .collect()
347}
348
349pub(super) fn parse_enum_with_warn<T: std::str::FromStr>(
354 raw: Option<&str>,
355 field: &str,
356 warnings: &mut Vec<String>,
357) -> Option<T> {
358 let raw = raw?;
359 match raw.parse() {
360 Ok(v) => Some(v),
361 Err(_) => {
362 warnings.push(format!("invalid {field}: '{raw}'"));
363 None
364 }
365 }
366}
367
368pub(super) fn parse_sigma_version(
374 m: &yaml_serde::Mapping,
375 warnings: &mut Vec<String>,
376) -> Option<u32> {
377 let value = m.get(val_key("sigma-version"))?;
378 match crate::version::major_from_value(value) {
379 Some(major) => Some(major),
380 None => {
381 warnings.push(format!(
382 "invalid sigma-version: {value:?} (expected a major version integer like 3, \
383 or a release string like \"2.1.0\")"
384 ));
385 None
386 }
387 }
388}
389
390pub(super) fn val_key(s: &str) -> Value {
391 Value::String(s.to_string())
392}
393
394pub(super) fn get_str<'a>(m: &'a yaml_serde::Mapping, key: &str) -> Option<&'a str> {
395 m.get(val_key(key)).and_then(|v| v.as_str())
396}
397
398pub(super) fn get_str_list(m: &yaml_serde::Mapping, key: &str) -> Vec<String> {
399 match m.get(val_key(key)) {
400 Some(Value::String(s)) => vec![s.clone()],
401 Some(Value::Sequence(seq)) => seq
402 .iter()
403 .filter_map(|v| v.as_str().map(|s| s.to_string()))
404 .collect(),
405 _ => Vec::new(),
406 }
407}
408
409fn deep_merge(dest: Value, src: Value) -> crate::error::Result<Value> {
416 const MAX_DEPTH: usize = 64;
417
418 let (mut root_dest, root_src) = match (dest, src) {
419 (Value::Mapping(d), Value::Mapping(s)) => (d, s),
420 (_, src) => return Ok(src),
421 };
422
423 fn merge_level(
424 dest: &mut yaml_serde::Mapping,
425 src: yaml_serde::Mapping,
426 depth: usize,
427 ) -> crate::error::Result<()> {
428 if depth > MAX_DEPTH {
429 return Err(crate::error::SigmaParserError::MergeTooDeep(MAX_DEPTH));
430 }
431 for (k, v) in src {
432 if let Some(existing) = dest.remove(&k) {
433 match (existing, v) {
434 (Value::Mapping(mut d), Value::Mapping(s)) => {
435 merge_level(&mut d, s, depth + 1)?;
436 dest.insert(k, Value::Mapping(d));
437 }
438 (_, src_val) => {
439 dest.insert(k, src_val);
440 }
441 }
442 } else {
443 dest.insert(k, v);
444 }
445 }
446 Ok(())
447 }
448
449 merge_level(&mut root_dest, root_src, 0)?;
450 Ok(Value::Mapping(root_dest))
451}