1use std::collections::HashMap;
14use std::path::Path;
15
16use serde::Deserialize;
17use serde_yaml::Value;
18
19use crate::ast::*;
20use crate::condition::parse_condition;
21use crate::error::{Result, SigmaParserError};
22use crate::value::{SigmaValue, Timespan};
23
24pub fn parse_sigma_yaml(yaml: &str) -> Result<SigmaCollection> {
35 let mut collection = SigmaCollection::new();
36 let mut global: Option<Value> = None;
37 let mut previous: Option<Value> = None;
38
39 for doc in serde_yaml::Deserializer::from_str(yaml) {
40 let value: Value = match Value::deserialize(doc) {
41 Ok(v) => v,
42 Err(e) => {
43 collection.errors.push(format!("YAML parse error: {e}"));
44 break;
48 }
49 };
50
51 let Some(mapping) = value.as_mapping() else {
52 collection
53 .errors
54 .push("Document is not a YAML mapping".to_string());
55 continue;
56 };
57
58 if let Some(action_val) = mapping.get(Value::String("action".to_string())) {
60 let Some(action) = action_val.as_str() else {
61 collection.errors.push(format!(
62 "collection 'action' must be a string, got: {action_val:?}"
63 ));
64 continue;
65 };
66 match action {
67 "global" => {
68 let mut global_map = value.clone();
69 if let Some(m) = global_map.as_mapping_mut() {
70 m.remove(Value::String("action".to_string()));
71 }
72 global = Some(global_map);
73 continue;
74 }
75 "reset" => {
76 global = None;
77 continue;
78 }
79 "repeat" => {
80 if let Some(ref prev) = previous {
82 let mut repeat_val = value.clone();
83 if let Some(m) = repeat_val.as_mapping_mut() {
84 m.remove(Value::String("action".to_string()));
85 }
86 let merged_repeat = deep_merge(prev.clone(), repeat_val);
87
88 let final_val = if let Some(ref global_val) = global {
90 deep_merge(global_val.clone(), merged_repeat)
91 } else {
92 merged_repeat
93 };
94
95 previous = Some(final_val.clone());
96
97 match parse_document(&final_val) {
98 Ok(doc) => match doc {
99 SigmaDocument::Rule(rule) => collection.rules.push(*rule),
100 SigmaDocument::Correlation(corr) => {
101 collection.correlations.push(corr)
102 }
103 SigmaDocument::Filter(filter) => collection.filters.push(filter),
104 },
105 Err(e) => {
106 collection.errors.push(e.to_string());
107 }
108 }
109 } else {
110 collection
111 .errors
112 .push("'action: repeat' without a previous document".to_string());
113 }
114 continue;
115 }
116 other => {
117 collection
118 .errors
119 .push(format!("Unknown collection action: {other}"));
120 continue;
121 }
122 }
123 }
124
125 let merged = if let Some(ref global_val) = global {
127 deep_merge(global_val.clone(), value)
128 } else {
129 value
130 };
131
132 previous = Some(merged.clone());
134
135 match parse_document(&merged) {
137 Ok(doc) => match doc {
138 SigmaDocument::Rule(rule) => collection.rules.push(*rule),
139 SigmaDocument::Correlation(corr) => collection.correlations.push(corr),
140 SigmaDocument::Filter(filter) => collection.filters.push(filter),
141 },
142 Err(e) => {
143 collection.errors.push(e.to_string());
144 }
145 }
146 }
147
148 Ok(collection)
149}
150
151pub fn parse_sigma_file(path: &Path) -> Result<SigmaCollection> {
153 let content = std::fs::read_to_string(path)?;
154 parse_sigma_yaml(&content)
155}
156
157pub fn parse_sigma_directory(dir: &Path) -> Result<SigmaCollection> {
159 let mut collection = SigmaCollection::new();
160
161 fn walk(dir: &Path, collection: &mut SigmaCollection) -> Result<()> {
162 for entry in std::fs::read_dir(dir)? {
163 let entry = entry?;
164 let path = entry.path();
165 if path.is_dir() {
166 walk(&path, collection)?;
167 } else if matches!(
168 path.extension().and_then(|e| e.to_str()),
169 Some("yml" | "yaml")
170 ) {
171 match parse_sigma_file(&path) {
172 Ok(sub) => {
173 collection.rules.extend(sub.rules);
174 collection.correlations.extend(sub.correlations);
175 collection.filters.extend(sub.filters);
176 collection.errors.extend(sub.errors);
177 }
178 Err(e) => {
179 collection.errors.push(format!("{}: {e}", path.display()));
180 }
181 }
182 }
183 }
184 Ok(())
185 }
186
187 walk(dir, &mut collection)?;
188 Ok(collection)
189}
190
191fn parse_document(value: &Value) -> Result<SigmaDocument> {
199 let mapping = value
200 .as_mapping()
201 .ok_or_else(|| SigmaParserError::InvalidRule("Document is not a YAML mapping".into()))?;
202
203 if mapping.contains_key(Value::String("correlation".into())) {
204 parse_correlation_rule(value).map(SigmaDocument::Correlation)
205 } else if mapping.contains_key(Value::String("filter".into())) {
206 parse_filter_rule(value).map(SigmaDocument::Filter)
207 } else {
208 parse_detection_rule(value).map(|r| SigmaDocument::Rule(Box::new(r)))
209 }
210}
211
212fn parse_detection_rule(value: &Value) -> Result<SigmaRule> {
220 let m = value
221 .as_mapping()
222 .ok_or_else(|| SigmaParserError::InvalidRule("Expected a YAML mapping".into()))?;
223
224 let title = get_str(m, "title")
225 .ok_or_else(|| SigmaParserError::MissingField("title".into()))?
226 .to_string();
227
228 let detection_val = m
229 .get(val_key("detection"))
230 .ok_or_else(|| SigmaParserError::MissingField("detection".into()))?;
231 let detection = parse_detections(detection_val)?;
232
233 let logsource = m
234 .get(val_key("logsource"))
235 .map(parse_logsource)
236 .transpose()?
237 .unwrap_or_default();
238
239 let standard_rule_keys: &[&str] = &[
244 "title",
245 "id",
246 "related",
247 "name",
248 "taxonomy",
249 "status",
250 "description",
251 "license",
252 "author",
253 "references",
254 "date",
255 "modified",
256 "logsource",
257 "detection",
258 "fields",
259 "falsepositives",
260 "level",
261 "tags",
262 "scope",
263 "custom_attributes",
264 ];
265 let custom_attributes = collect_custom_attributes(m, standard_rule_keys);
266
267 Ok(SigmaRule {
268 title,
269 logsource,
270 detection,
271 id: get_str(m, "id").map(|s| s.to_string()),
272 name: get_str(m, "name").map(|s| s.to_string()),
273 related: parse_related(m.get(val_key("related"))),
274 taxonomy: get_str(m, "taxonomy").map(|s| s.to_string()),
275 status: get_str(m, "status").and_then(|s| s.parse().ok()),
276 description: get_str(m, "description").map(|s| s.to_string()),
277 license: get_str(m, "license").map(|s| s.to_string()),
278 author: get_str(m, "author").map(|s| s.to_string()),
279 references: get_str_list(m, "references"),
280 date: get_str(m, "date").map(|s| s.to_string()),
281 modified: get_str(m, "modified").map(|s| s.to_string()),
282 fields: get_str_list(m, "fields"),
283 falsepositives: get_str_list(m, "falsepositives"),
284 level: get_str(m, "level").and_then(|s| s.parse().ok()),
285 tags: get_str_list(m, "tags"),
286 scope: get_str_list(m, "scope"),
287 custom_attributes,
288 })
289}
290
291fn collect_custom_attributes(
302 m: &serde_yaml::Mapping,
303 standard_keys: &[&str],
304) -> HashMap<String, Value> {
305 let mut attrs: HashMap<String, Value> = m
306 .iter()
307 .filter_map(|(k, v)| {
308 let key = k.as_str()?;
309 if standard_keys.contains(&key) {
310 None
311 } else {
312 Some((key.to_string(), v.clone()))
313 }
314 })
315 .collect();
316
317 if let Some(Value::Mapping(explicit)) = m.get(val_key("custom_attributes")) {
318 for (k, v) in explicit {
319 if let Some(key) = k.as_str() {
320 attrs.insert(key.to_string(), v.clone());
321 }
322 }
323 }
324
325 attrs
326}
327
328fn parse_detections(value: &Value) -> Result<Detections> {
341 let m = value.as_mapping().ok_or_else(|| {
342 SigmaParserError::InvalidDetection("Detection section must be a mapping".into())
343 })?;
344
345 let condition_val = m
347 .get(val_key("condition"))
348 .ok_or_else(|| SigmaParserError::MissingField("condition".into()))?;
349
350 let condition_strings = match condition_val {
351 Value::String(s) => vec![s.clone()],
352 Value::Sequence(seq) => {
353 let mut strings = Vec::with_capacity(seq.len());
354 for v in seq {
355 match v.as_str() {
356 Some(s) => strings.push(s.to_string()),
357 None => {
358 return Err(SigmaParserError::InvalidDetection(format!(
359 "condition list items must be strings, got: {v:?}"
360 )));
361 }
362 }
363 }
364 strings
365 }
366 _ => {
367 return Err(SigmaParserError::InvalidDetection(
368 "condition must be a string or list of strings".into(),
369 ));
370 }
371 };
372
373 let conditions: Vec<ConditionExpr> = condition_strings
375 .iter()
376 .map(|s| parse_condition(s))
377 .collect::<Result<Vec<_>>>()?;
378
379 let timeframe = get_str(m, "timeframe").map(|s| s.to_string());
381
382 let mut named = HashMap::new();
384 for (key, val) in m {
385 let key_str = key.as_str().unwrap_or("");
386 if key_str == "condition" || key_str == "timeframe" {
387 continue;
388 }
389 named.insert(key_str.to_string(), parse_detection(val)?);
390 }
391
392 Ok(Detections {
393 named,
394 conditions,
395 condition_strings,
396 timeframe,
397 })
398}
399
400fn parse_detection(value: &Value) -> Result<Detection> {
409 match value {
410 Value::Mapping(m) => {
411 let items: Vec<DetectionItem> = m
413 .iter()
414 .map(|(k, v)| parse_detection_item(k.as_str().unwrap_or(""), v))
415 .collect::<Result<Vec<_>>>()?;
416 Ok(Detection::AllOf(items))
417 }
418 Value::Sequence(seq) => {
419 let all_plain = seq.iter().all(|v| !v.is_mapping() && !v.is_sequence());
421 if all_plain {
422 let values = seq.iter().map(SigmaValue::from_yaml).collect();
424 Ok(Detection::Keywords(values))
425 } else {
426 let subs: Vec<Detection> = seq
428 .iter()
429 .map(parse_detection)
430 .collect::<Result<Vec<_>>>()?;
431 Ok(Detection::AnyOf(subs))
432 }
433 }
434 _ => Ok(Detection::Keywords(vec![SigmaValue::from_yaml(value)])),
436 }
437}
438
439fn parse_detection_item(key: &str, value: &Value) -> Result<DetectionItem> {
448 let field = parse_field_spec(key)?;
449
450 let values = match value {
451 Value::Sequence(seq) => seq.iter().map(|v| to_sigma_value(v, &field)).collect(),
452 _ => vec![to_sigma_value(value, &field)],
453 };
454
455 Ok(DetectionItem { field, values })
456}
457
458fn to_sigma_value(v: &Value, field: &FieldSpec) -> SigmaValue {
462 if field.has_modifier(Modifier::Re)
463 && let Value::String(s) = v
464 {
465 return SigmaValue::from_raw_string(s);
466 }
467 SigmaValue::from_yaml(v)
468}
469
470pub fn parse_field_spec(key: &str) -> Result<FieldSpec> {
474 if key.is_empty() {
475 return Ok(FieldSpec::new(None, Vec::new()));
476 }
477
478 let parts: Vec<&str> = key.split('|').collect();
479 let field_name = parts[0];
480 let field = if field_name.is_empty() {
481 None
482 } else {
483 Some(field_name.to_string())
484 };
485
486 let mut modifiers = Vec::new();
487 for &mod_str in &parts[1..] {
488 let m = mod_str
489 .parse::<Modifier>()
490 .map_err(|_| SigmaParserError::UnknownModifier(mod_str.to_string()))?;
491 modifiers.push(m);
492 }
493
494 Ok(FieldSpec::new(field, modifiers))
495}
496
497fn parse_logsource(value: &Value) -> Result<LogSource> {
502 let m = value
503 .as_mapping()
504 .ok_or_else(|| SigmaParserError::InvalidRule("logsource must be a mapping".into()))?;
505
506 let mut custom = HashMap::new();
507 let known_keys = ["category", "product", "service", "definition"];
508
509 for (k, v) in m {
510 let key_str = k.as_str().unwrap_or("");
511 if !known_keys.contains(&key_str) && !key_str.is_empty() {
512 match v.as_str() {
513 Some(val_str) => {
514 custom.insert(key_str.to_string(), val_str.to_string());
515 }
516 None => {
517 log::warn!(
518 "logsource custom field '{key_str}' has non-string value ({v:?}), skipping"
519 );
520 }
521 }
522 }
523 }
524
525 Ok(LogSource {
526 category: get_str(m, "category").map(|s| s.to_string()),
527 product: get_str(m, "product").map(|s| s.to_string()),
528 service: get_str(m, "service").map(|s| s.to_string()),
529 definition: get_str(m, "definition").map(|s| s.to_string()),
530 custom,
531 })
532}
533
534fn parse_related(value: Option<&Value>) -> Vec<Related> {
539 let Some(Value::Sequence(seq)) = value else {
540 return Vec::new();
541 };
542
543 seq.iter()
544 .filter_map(|item| {
545 let m = item.as_mapping()?;
546 let id = get_str(m, "id")?.to_string();
547 let type_str = get_str(m, "type")?;
548 let relation_type = type_str.parse().ok()?;
549 Some(Related { id, relation_type })
550 })
551 .collect()
552}
553
554fn parse_correlation_rule(value: &Value) -> Result<CorrelationRule> {
562 let m = value
563 .as_mapping()
564 .ok_or_else(|| SigmaParserError::InvalidCorrelation("Expected a YAML mapping".into()))?;
565
566 let title = get_str(m, "title")
567 .ok_or_else(|| SigmaParserError::MissingField("title".into()))?
568 .to_string();
569
570 let corr_val = m
571 .get(val_key("correlation"))
572 .ok_or_else(|| SigmaParserError::MissingField("correlation".into()))?;
573 let corr = corr_val.as_mapping().ok_or_else(|| {
574 SigmaParserError::InvalidCorrelation("correlation must be a mapping".into())
575 })?;
576
577 let type_str = get_str(corr, "type")
579 .ok_or_else(|| SigmaParserError::InvalidCorrelation("Missing correlation type".into()))?;
580 let correlation_type: CorrelationType = type_str.parse().map_err(|_| {
581 SigmaParserError::InvalidCorrelation(format!("Unknown correlation type: {type_str}"))
582 })?;
583
584 let rules = match corr.get(val_key("rules")) {
586 Some(Value::Sequence(seq)) => seq
587 .iter()
588 .filter_map(|v| v.as_str().map(|s| s.to_string()))
589 .collect(),
590 Some(Value::String(s)) => vec![s.clone()],
591 _ => Vec::new(),
592 };
593
594 let group_by = match corr.get(val_key("group-by")) {
596 Some(Value::Sequence(seq)) => seq
597 .iter()
598 .filter_map(|v| v.as_str().map(|s| s.to_string()))
599 .collect(),
600 Some(Value::String(s)) => vec![s.clone()],
601 _ => Vec::new(),
602 };
603
604 let timespan_str = get_str(corr, "timeframe")
606 .or_else(|| get_str(corr, "timespan"))
607 .ok_or_else(|| SigmaParserError::InvalidCorrelation("Missing timeframe".into()))?;
608 let timespan = Timespan::parse(timespan_str)?;
609
610 let generate = m
613 .get(val_key("generate"))
614 .and_then(|v| v.as_bool())
615 .or_else(|| corr.get(val_key("generate")).and_then(|v| v.as_bool()))
616 .unwrap_or(false);
617
618 let condition = parse_correlation_condition(corr, correlation_type)?;
620
621 let aliases = parse_correlation_aliases(corr);
623
624 let standard_correlation_keys: &[&str] = &[
627 "author",
628 "correlation",
629 "custom_attributes",
630 "date",
631 "description",
632 "falsepositives",
633 "fields",
634 "generate",
635 "id",
636 "level",
637 "license",
638 "modified",
639 "name",
640 "references",
641 "related",
642 "scope",
643 "status",
644 "tags",
645 "taxonomy",
646 "title",
647 ];
648 let custom_attributes = collect_custom_attributes(m, standard_correlation_keys);
649
650 Ok(CorrelationRule {
651 title,
652 id: get_str(m, "id").map(|s| s.to_string()),
653 name: get_str(m, "name").map(|s| s.to_string()),
654 status: get_str(m, "status").and_then(|s| s.parse().ok()),
655 description: get_str(m, "description").map(|s| s.to_string()),
656 author: get_str(m, "author").map(|s| s.to_string()),
657 date: get_str(m, "date").map(|s| s.to_string()),
658 modified: get_str(m, "modified").map(|s| s.to_string()),
659 related: parse_related(m.get(val_key("related"))),
660 references: get_str_list(m, "references"),
661 taxonomy: get_str(m, "taxonomy").map(|s| s.to_string()),
662 license: get_str(m, "license").map(|s| s.to_string()),
663 tags: get_str_list(m, "tags"),
664 fields: get_str_list(m, "fields"),
665 falsepositives: get_str_list(m, "falsepositives"),
666 level: get_str(m, "level").and_then(|s| s.parse().ok()),
667 scope: get_str_list(m, "scope"),
668 correlation_type,
669 rules,
670 group_by,
671 timespan,
672 condition,
673 aliases,
674 generate,
675 custom_attributes,
676 })
677}
678
679fn parse_correlation_condition(
683 corr: &serde_yaml::Mapping,
684 correlation_type: CorrelationType,
685) -> Result<CorrelationCondition> {
686 let condition_val = corr.get(val_key("condition"));
687
688 match condition_val {
689 Some(Value::Mapping(cm)) => {
690 let operators = ["lt", "lte", "gt", "gte", "eq", "neq"];
692 let mut predicates = Vec::new();
693
694 for &op_str in &operators {
695 if let Some(val) = cm.get(val_key(op_str))
696 && let Ok(parsed_op) = op_str.parse::<ConditionOperator>()
697 {
698 let count = val
699 .as_u64()
700 .or_else(|| val.as_i64().map(|i| i as u64))
701 .ok_or_else(|| {
702 SigmaParserError::InvalidCorrelation(format!(
703 "correlation condition operator '{op_str}' requires a numeric value, got: {val:?}"
704 ))
705 })?;
706 predicates.push((parsed_op, count));
707 }
708 }
709
710 if predicates.is_empty() {
711 return Err(SigmaParserError::InvalidCorrelation(
712 "Correlation condition must have an operator (lt, lte, gt, gte, eq, neq)"
713 .into(),
714 ));
715 }
716
717 let field = match cm.get(val_key("field")) {
718 Some(Value::String(s)) => Some(vec![s.clone()]),
719 Some(Value::Sequence(seq)) => {
720 let fields: Vec<String> = seq
721 .iter()
722 .filter_map(|v| v.as_str().map(|s| s.to_string()))
723 .collect();
724 if fields.is_empty() {
725 None
726 } else {
727 Some(fields)
728 }
729 }
730 _ => None,
731 };
732
733 let percentile = cm.get(val_key("percentile")).and_then(|v| v.as_u64());
734
735 Ok(CorrelationCondition::Threshold {
736 predicates,
737 field,
738 percentile,
739 })
740 }
741 Some(Value::String(expr_str)) => {
742 let expr = parse_condition(expr_str)?;
744 Ok(CorrelationCondition::Extended(expr))
745 }
746 None => {
747 match correlation_type {
749 CorrelationType::Temporal | CorrelationType::TemporalOrdered => {
750 Ok(CorrelationCondition::Threshold {
751 predicates: vec![(ConditionOperator::Gte, 1)],
752 field: None,
753 percentile: None,
754 })
755 }
756 _ => Err(SigmaParserError::InvalidCorrelation(
757 "Non-temporal correlation rule requires a condition".into(),
758 )),
759 }
760 }
761 _ => Err(SigmaParserError::InvalidCorrelation(
762 "Correlation condition must be a mapping or string".into(),
763 )),
764 }
765}
766
767fn parse_correlation_aliases(corr: &serde_yaml::Mapping) -> Vec<FieldAlias> {
769 let Some(Value::Mapping(aliases_map)) = corr.get(val_key("aliases")) else {
770 return Vec::new();
771 };
772
773 aliases_map
774 .iter()
775 .filter_map(|(alias_key, alias_val)| {
776 let alias = alias_key.as_str()?.to_string();
777 let mapping_map = alias_val.as_mapping()?;
778 let mapping: HashMap<String, String> = mapping_map
779 .iter()
780 .filter_map(|(k, v)| Some((k.as_str()?.to_string(), v.as_str()?.to_string())))
781 .collect();
782 Some(FieldAlias { alias, mapping })
783 })
784 .collect()
785}
786
787fn parse_filter_rule(value: &Value) -> Result<FilterRule> {
793 let m = value
794 .as_mapping()
795 .ok_or_else(|| SigmaParserError::InvalidRule("Expected a YAML mapping".into()))?;
796
797 let title = get_str(m, "title")
798 .ok_or_else(|| SigmaParserError::MissingField("title".into()))?
799 .to_string();
800
801 let filter_val = m.get(val_key("filter"));
803 let filter_mapping = filter_val.and_then(|v| v.as_mapping());
804 let rules = match filter_mapping {
805 Some(fm) => match fm.get(val_key("rules")) {
806 Some(Value::String(s)) if s.eq_ignore_ascii_case("any") => FilterRuleTarget::Any,
807 Some(Value::String(s)) => FilterRuleTarget::Specific(vec![s.clone()]),
808 Some(Value::Sequence(seq)) => {
809 let list: Vec<String> = seq
810 .iter()
811 .filter_map(|v| v.as_str().map(|s| s.to_string()))
812 .collect();
813 if list.is_empty() {
814 FilterRuleTarget::Any
815 } else {
816 FilterRuleTarget::Specific(list)
817 }
818 }
819 _ => FilterRuleTarget::Any,
820 },
821 _ => FilterRuleTarget::Any,
822 };
823
824 let detection = if let Some(fm) = filter_mapping {
827 let mut det_map = serde_yaml::Mapping::new();
828 for (k, v) in fm.iter() {
829 let key_str = k.as_str().unwrap_or("");
830 if key_str != "rules" {
831 det_map.insert(k.clone(), v.clone());
832 }
833 }
834 if det_map.is_empty() {
835 return Err(SigmaParserError::MissingField("filter.selection".into()));
836 }
837 parse_detections(&Value::Mapping(det_map))?
838 } else {
839 return Err(SigmaParserError::MissingField("filter".into()));
840 };
841
842 let logsource = m
843 .get(val_key("logsource"))
844 .map(parse_logsource)
845 .transpose()?;
846
847 let standard_filter_keys: &[&str] = &[
848 "author",
849 "custom_attributes",
850 "date",
851 "description",
852 "falsepositives",
853 "fields",
854 "filter",
855 "id",
856 "level",
857 "license",
858 "logsource",
859 "modified",
860 "name",
861 "references",
862 "related",
863 "scope",
864 "status",
865 "tags",
866 "taxonomy",
867 "title",
868 ];
869 let custom_attributes = collect_custom_attributes(m, standard_filter_keys);
870
871 Ok(FilterRule {
872 title,
873 id: get_str(m, "id").map(|s| s.to_string()),
874 name: get_str(m, "name").map(|s| s.to_string()),
875 taxonomy: get_str(m, "taxonomy").map(|s| s.to_string()),
876 status: get_str(m, "status").and_then(|s| s.parse().ok()),
877 description: get_str(m, "description").map(|s| s.to_string()),
878 author: get_str(m, "author").map(|s| s.to_string()),
879 date: get_str(m, "date").map(|s| s.to_string()),
880 modified: get_str(m, "modified").map(|s| s.to_string()),
881 related: parse_related(m.get(val_key("related"))),
882 license: get_str(m, "license").map(|s| s.to_string()),
883 references: get_str_list(m, "references"),
884 tags: get_str_list(m, "tags"),
885 fields: get_str_list(m, "fields"),
886 falsepositives: get_str_list(m, "falsepositives"),
887 level: get_str(m, "level").and_then(|s| s.parse().ok()),
888 scope: get_str_list(m, "scope"),
889 logsource,
890 rules,
891 detection,
892 custom_attributes,
893 })
894}
895
896fn val_key(s: &str) -> Value {
901 Value::String(s.to_string())
902}
903
904fn get_str<'a>(m: &'a serde_yaml::Mapping, key: &str) -> Option<&'a str> {
905 m.get(val_key(key)).and_then(|v| v.as_str())
906}
907
908fn get_str_list(m: &serde_yaml::Mapping, key: &str) -> Vec<String> {
909 match m.get(val_key(key)) {
910 Some(Value::String(s)) => vec![s.clone()],
911 Some(Value::Sequence(seq)) => seq
912 .iter()
913 .filter_map(|v| v.as_str().map(|s| s.to_string()))
914 .collect(),
915 _ => Vec::new(),
916 }
917}
918
919fn deep_merge(dest: Value, src: Value) -> Value {
923 match (dest, src) {
924 (Value::Mapping(mut dest_map), Value::Mapping(src_map)) => {
925 for (k, v) in src_map {
926 let merged = if let Some(existing) = dest_map.remove(&k) {
927 deep_merge(existing, v)
928 } else {
929 v
930 };
931 dest_map.insert(k, merged);
932 }
933 Value::Mapping(dest_map)
934 }
935 (_, src) => src, }
937}
938
939#[cfg(test)]
944mod tests {
945 use super::*;
946
947 #[test]
948 fn test_parse_simple_rule() {
949 let yaml = r#"
950title: Test Rule
951id: 12345678-1234-1234-1234-123456789012
952status: test
953logsource:
954 product: windows
955 category: process_creation
956detection:
957 selection:
958 CommandLine|contains: 'whoami'
959 condition: selection
960level: medium
961"#;
962 let collection = parse_sigma_yaml(yaml).unwrap();
963 assert_eq!(collection.rules.len(), 1);
964
965 let rule = &collection.rules[0];
966 assert_eq!(rule.title, "Test Rule");
967 assert_eq!(rule.logsource.product, Some("windows".to_string()));
968 assert_eq!(
969 rule.logsource.category,
970 Some("process_creation".to_string())
971 );
972 assert_eq!(rule.level, Some(Level::Medium));
973 assert_eq!(rule.detection.conditions.len(), 1);
974 assert_eq!(
975 rule.detection.conditions[0],
976 ConditionExpr::Identifier("selection".to_string())
977 );
978 assert!(rule.detection.named.contains_key("selection"));
979 }
980
981 #[test]
982 fn test_parse_field_modifiers() {
983 let spec = parse_field_spec("TargetObject|endswith").unwrap();
984 assert_eq!(spec.name, Some("TargetObject".to_string()));
985 assert_eq!(spec.modifiers, vec![Modifier::EndsWith]);
986
987 let spec = parse_field_spec("Destination|contains|all").unwrap();
988 assert_eq!(spec.name, Some("Destination".to_string()));
989 assert_eq!(spec.modifiers, vec![Modifier::Contains, Modifier::All]);
990
991 let spec = parse_field_spec("Details|re").unwrap();
992 assert_eq!(spec.name, Some("Details".to_string()));
993 assert_eq!(spec.modifiers, vec![Modifier::Re]);
994
995 let spec = parse_field_spec("Destination|base64offset|contains").unwrap();
996 assert_eq!(
997 spec.modifiers,
998 vec![Modifier::Base64Offset, Modifier::Contains]
999 );
1000 }
1001
1002 #[test]
1003 fn test_parse_complex_condition() {
1004 let yaml = r#"
1005title: Complex Rule
1006logsource:
1007 product: windows
1008 category: registry_set
1009detection:
1010 selection_main:
1011 TargetObject|contains: '\SOFTWARE\Microsoft\Windows Defender\'
1012 selection_dword_1:
1013 Details: 'DWORD (0x00000001)'
1014 filter_optional_symantec:
1015 Image|startswith: 'C:\Program Files\Symantec\'
1016 condition: selection_main and 1 of selection_dword_* and not 1 of filter_optional_*
1017"#;
1018 let collection = parse_sigma_yaml(yaml).unwrap();
1019 assert_eq!(collection.rules.len(), 1);
1020
1021 let rule = &collection.rules[0];
1022 assert_eq!(rule.detection.named.len(), 3);
1023
1024 let cond = &rule.detection.conditions[0];
1025 match cond {
1026 ConditionExpr::And(args) => {
1027 assert_eq!(args.len(), 3);
1028 }
1029 _ => panic!("Expected AND condition"),
1030 }
1031 }
1032
1033 #[test]
1034 fn test_parse_condition_list() {
1035 let yaml = r#"
1036title: Multi-condition Rule
1037logsource:
1038 category: test
1039detection:
1040 selection1:
1041 username: user1
1042 selection2:
1043 username: user2
1044 condition:
1045 - selection1
1046 - selection2
1047"#;
1048 let collection = parse_sigma_yaml(yaml).unwrap();
1049 let rule = &collection.rules[0];
1050 assert_eq!(rule.detection.conditions.len(), 2);
1051 }
1052
1053 #[test]
1054 fn test_parse_correlation_rule() {
1055 let yaml = r#"
1056title: Base Rule
1057id: f305fd62-beca-47da-ad95-7690a0620084
1058logsource:
1059 product: aws
1060 service: cloudtrail
1061detection:
1062 selection:
1063 eventSource: "s3.amazonaws.com"
1064 condition: selection
1065level: low
1066---
1067title: Multiple AWS bucket enumerations
1068id: be246094-01d3-4bba-88de-69e582eba0cc
1069status: experimental
1070correlation:
1071 type: event_count
1072 rules:
1073 - f305fd62-beca-47da-ad95-7690a0620084
1074 group-by:
1075 - userIdentity.arn
1076 timespan: 1h
1077 condition:
1078 gte: 100
1079level: high
1080"#;
1081 let collection = parse_sigma_yaml(yaml).unwrap();
1082 assert_eq!(collection.rules.len(), 1);
1083 assert_eq!(collection.correlations.len(), 1);
1084
1085 let corr = &collection.correlations[0];
1086 assert_eq!(corr.correlation_type, CorrelationType::EventCount);
1087 assert_eq!(corr.timespan.seconds, 3600);
1088 assert_eq!(corr.group_by, vec!["userIdentity.arn"]);
1089
1090 match &corr.condition {
1091 CorrelationCondition::Threshold { predicates, .. } => {
1092 assert_eq!(predicates.len(), 1);
1093 assert_eq!(predicates[0].0, ConditionOperator::Gte);
1094 assert_eq!(predicates[0].1, 100);
1095 }
1096 _ => panic!("Expected threshold condition"),
1097 }
1098 }
1099
1100 #[test]
1101 fn test_parse_correlation_rule_custom_attributes() {
1102 let yaml = r#"
1103title: Login
1104id: login-rule
1105logsource:
1106 category: auth
1107detection:
1108 selection:
1109 EventType: login
1110 condition: selection
1111---
1112title: Many Logins
1113custom_attributes:
1114 rsigma.correlation_event_mode: refs
1115 rsigma.suppress: 5m
1116 rsigma.action: reset
1117 rsigma.max_correlation_events: "25"
1118correlation:
1119 type: event_count
1120 rules:
1121 - login-rule
1122 group-by:
1123 - User
1124 timespan: 60s
1125 condition:
1126 gte: 3
1127level: high
1128"#;
1129 let collection = parse_sigma_yaml(yaml).unwrap();
1130 assert_eq!(collection.correlations.len(), 1);
1131
1132 let corr = &collection.correlations[0];
1133 assert_eq!(
1134 corr.custom_attributes
1135 .get("rsigma.correlation_event_mode")
1136 .and_then(Value::as_str),
1137 Some("refs")
1138 );
1139 assert_eq!(
1140 corr.custom_attributes
1141 .get("rsigma.suppress")
1142 .and_then(Value::as_str),
1143 Some("5m")
1144 );
1145 assert_eq!(
1146 corr.custom_attributes
1147 .get("rsigma.action")
1148 .and_then(Value::as_str),
1149 Some("reset")
1150 );
1151 assert_eq!(
1152 corr.custom_attributes
1153 .get("rsigma.max_correlation_events")
1154 .and_then(Value::as_str),
1155 Some("25")
1156 );
1157 }
1158
1159 #[test]
1160 fn test_parse_correlation_rule_no_custom_attributes() {
1161 let yaml = r#"
1162title: Login
1163id: login-rule
1164logsource:
1165 category: auth
1166detection:
1167 selection:
1168 EventType: login
1169 condition: selection
1170---
1171title: Many Logins
1172correlation:
1173 type: event_count
1174 rules:
1175 - login-rule
1176 group-by:
1177 - User
1178 timespan: 60s
1179 condition:
1180 gte: 3
1181level: high
1182"#;
1183 let collection = parse_sigma_yaml(yaml).unwrap();
1184 let corr = &collection.correlations[0];
1185 assert!(corr.custom_attributes.is_empty());
1186 }
1187
1188 #[test]
1189 fn test_parse_detection_or_linked() {
1190 let yaml = r#"
1191title: OR-linked detections
1192logsource:
1193 product: windows
1194 category: wmi_event
1195detection:
1196 selection:
1197 - Destination|contains|all:
1198 - 'new-object'
1199 - 'net.webclient'
1200 - Destination|contains:
1201 - 'WScript.Shell'
1202 condition: selection
1203level: high
1204"#;
1205 let collection = parse_sigma_yaml(yaml).unwrap();
1206 let rule = &collection.rules[0];
1207 let detection = &rule.detection.named["selection"];
1208
1209 match detection {
1210 Detection::AnyOf(subs) => {
1211 assert_eq!(subs.len(), 2);
1212 }
1213 _ => panic!("Expected AnyOf detection, got {detection:?}"),
1214 }
1215 }
1216
1217 #[test]
1218 fn test_parse_global_action() {
1219 let yaml = r#"
1220action: global
1221title: Global Rule
1222logsource:
1223 product: windows
1224---
1225detection:
1226 selection:
1227 EventID: 1
1228 condition: selection
1229level: high
1230---
1231detection:
1232 selection:
1233 EventID: 2
1234 condition: selection
1235level: medium
1236"#;
1237 let collection = parse_sigma_yaml(yaml).unwrap();
1238 assert_eq!(collection.rules.len(), 2);
1239 assert_eq!(collection.rules[0].title, "Global Rule");
1240 assert_eq!(collection.rules[1].title, "Global Rule");
1241 }
1242
1243 #[test]
1244 fn test_unknown_modifier_error() {
1245 let result = parse_field_spec("field|foobar");
1246 assert!(result.is_err());
1247 }
1248
1249 #[test]
1252 fn test_parse_contains_re_combination() {
1253 let spec = parse_field_spec("CommandLine|contains|re").unwrap();
1254 assert_eq!(spec.modifiers, vec![Modifier::Contains, Modifier::Re]);
1255 }
1256
1257 #[test]
1258 fn test_parse_duplicate_modifiers() {
1259 let spec = parse_field_spec("Field|contains|contains").unwrap();
1260 assert_eq!(spec.modifiers, vec![Modifier::Contains, Modifier::Contains]);
1261 }
1262
1263 #[test]
1264 fn test_parse_conflicting_string_match_modifiers() {
1265 let spec = parse_field_spec("Field|contains|startswith").unwrap();
1266 assert_eq!(
1267 spec.modifiers,
1268 vec![Modifier::Contains, Modifier::StartsWith]
1269 );
1270 }
1271
1272 #[test]
1273 fn test_parse_conflicting_endswith_startswith() {
1274 let spec = parse_field_spec("Field|endswith|startswith").unwrap();
1275 assert_eq!(
1276 spec.modifiers,
1277 vec![Modifier::EndsWith, Modifier::StartsWith]
1278 );
1279 }
1280
1281 #[test]
1282 fn test_parse_re_with_contains() {
1283 let spec = parse_field_spec("Field|re|contains").unwrap();
1284 assert_eq!(spec.modifiers, vec![Modifier::Re, Modifier::Contains]);
1285 }
1286
1287 #[test]
1288 fn test_parse_cidr_with_contains() {
1289 let spec = parse_field_spec("Field|cidr|contains").unwrap();
1290 assert_eq!(spec.modifiers, vec![Modifier::Cidr, Modifier::Contains]);
1291 }
1292
1293 #[test]
1294 fn test_parse_multiple_encoding_modifiers() {
1295 let spec = parse_field_spec("Field|base64|wide|base64offset").unwrap();
1296 assert_eq!(
1297 spec.modifiers,
1298 vec![Modifier::Base64, Modifier::Wide, Modifier::Base64Offset]
1299 );
1300 }
1301
1302 #[test]
1303 fn test_parse_numeric_with_string_modifiers() {
1304 let spec = parse_field_spec("Field|gt|contains").unwrap();
1305 assert_eq!(spec.modifiers, vec![Modifier::Gt, Modifier::Contains]);
1306 }
1307
1308 #[test]
1309 fn test_parse_exists_with_other_modifiers() {
1310 let spec = parse_field_spec("Field|exists|contains").unwrap();
1311 assert_eq!(spec.modifiers, vec![Modifier::Exists, Modifier::Contains]);
1312 }
1313
1314 #[test]
1315 fn test_parse_re_with_regex_flags() {
1316 let spec = parse_field_spec("Field|re|i|m|s").unwrap();
1317 assert_eq!(
1318 spec.modifiers,
1319 vec![
1320 Modifier::Re,
1321 Modifier::IgnoreCase,
1322 Modifier::Multiline,
1323 Modifier::DotAll
1324 ]
1325 );
1326 }
1327
1328 #[test]
1329 fn test_parse_regex_flags_without_re() {
1330 let spec = parse_field_spec("Field|i|m").unwrap();
1331 assert_eq!(
1332 spec.modifiers,
1333 vec![Modifier::IgnoreCase, Modifier::Multiline]
1334 );
1335 }
1336
1337 #[test]
1338 fn test_keyword_detection() {
1339 let yaml = r#"
1340title: Keyword Rule
1341logsource:
1342 category: test
1343detection:
1344 keywords:
1345 - 'suspicious'
1346 - 'malware'
1347 condition: keywords
1348level: high
1349"#;
1350 let collection = parse_sigma_yaml(yaml).unwrap();
1351 let rule = &collection.rules[0];
1352 let det = &rule.detection.named["keywords"];
1353 match det {
1354 Detection::Keywords(vals) => assert_eq!(vals.len(), 2),
1355 _ => panic!("Expected Keywords detection"),
1356 }
1357 }
1358
1359 #[test]
1360 fn test_action_repeat() {
1361 let yaml = r#"
1362title: Base Rule
1363logsource:
1364 product: windows
1365 category: process_creation
1366detection:
1367 selection:
1368 CommandLine|contains: 'whoami'
1369 condition: selection
1370level: medium
1371---
1372action: repeat
1373title: Repeated Rule
1374detection:
1375 selection:
1376 CommandLine|contains: 'ipconfig'
1377 condition: selection
1378"#;
1379 let collection = parse_sigma_yaml(yaml).unwrap();
1380 assert_eq!(collection.rules.len(), 2);
1381 assert!(
1382 collection.errors.is_empty(),
1383 "errors: {:?}",
1384 collection.errors
1385 );
1386
1387 assert_eq!(collection.rules[0].title, "Base Rule");
1389 assert_eq!(collection.rules[0].level, Some(crate::ast::Level::Medium));
1390 assert_eq!(
1391 collection.rules[0].logsource.product,
1392 Some("windows".to_string())
1393 );
1394
1395 assert_eq!(collection.rules[1].title, "Repeated Rule");
1397 assert_eq!(
1399 collection.rules[1].logsource.product,
1400 Some("windows".to_string())
1401 );
1402 assert_eq!(
1403 collection.rules[1].logsource.category,
1404 Some("process_creation".to_string())
1405 );
1406 assert_eq!(collection.rules[1].level, Some(crate::ast::Level::Medium));
1407 }
1408
1409 #[test]
1410 fn test_action_repeat_no_previous() {
1411 let yaml = r#"
1412action: repeat
1413title: Orphan Rule
1414detection:
1415 selection:
1416 CommandLine|contains: 'whoami'
1417 condition: selection
1418"#;
1419 let collection = parse_sigma_yaml(yaml).unwrap();
1420 assert_eq!(collection.rules.len(), 0);
1421 assert_eq!(collection.errors.len(), 1);
1422 assert!(collection.errors[0].contains("without a previous document"));
1423 }
1424
1425 #[test]
1426 fn test_action_repeat_multiple_repeats() {
1427 let yaml = r#"
1429title: Base
1430logsource:
1431 product: windows
1432 category: process_creation
1433level: high
1434detection:
1435 selection:
1436 CommandLine|contains: 'cmd'
1437 condition: selection
1438---
1439action: repeat
1440title: Repeat One
1441detection:
1442 selection:
1443 CommandLine|contains: 'powershell'
1444 condition: selection
1445---
1446action: repeat
1447title: Repeat Two
1448detection:
1449 selection:
1450 CommandLine|contains: 'wscript'
1451 condition: selection
1452"#;
1453 let collection = parse_sigma_yaml(yaml).unwrap();
1454 assert_eq!(collection.rules.len(), 3);
1455 assert!(collection.errors.is_empty());
1456 assert_eq!(collection.rules[0].title, "Base");
1457 assert_eq!(collection.rules[1].title, "Repeat One");
1458 assert_eq!(collection.rules[2].title, "Repeat Two");
1459
1460 for rule in &collection.rules {
1462 assert_eq!(rule.logsource.product, Some("windows".to_string()));
1463 assert_eq!(
1464 rule.logsource.category,
1465 Some("process_creation".to_string())
1466 );
1467 assert_eq!(rule.level, Some(crate::ast::Level::High));
1468 }
1469 }
1470
1471 #[test]
1472 fn test_action_repeat_chained_inherits_from_last() {
1473 let yaml = r#"
1475title: First
1476logsource:
1477 product: linux
1478level: low
1479detection:
1480 selection:
1481 command|contains: 'ls'
1482 condition: selection
1483---
1484action: repeat
1485title: Second
1486level: medium
1487detection:
1488 selection:
1489 command|contains: 'cat'
1490 condition: selection
1491---
1492action: repeat
1493title: Third
1494detection:
1495 selection:
1496 command|contains: 'grep'
1497 condition: selection
1498"#;
1499 let collection = parse_sigma_yaml(yaml).unwrap();
1500 assert_eq!(collection.rules.len(), 3);
1501
1502 assert_eq!(collection.rules[0].level, Some(crate::ast::Level::Low));
1504 assert_eq!(collection.rules[1].level, Some(crate::ast::Level::Medium));
1506 assert_eq!(collection.rules[2].level, Some(crate::ast::Level::Medium));
1508 for rule in &collection.rules {
1510 assert_eq!(rule.logsource.product, Some("linux".to_string()));
1511 }
1512 }
1513
1514 #[test]
1515 fn test_action_repeat_with_global_template() {
1516 let yaml = r#"
1517action: global
1518logsource:
1519 product: windows
1520level: medium
1521---
1522title: Rule A
1523detection:
1524 selection:
1525 EventID: 1
1526 condition: selection
1527---
1528action: repeat
1529title: Rule B
1530detection:
1531 selection:
1532 EventID: 2
1533 condition: selection
1534"#;
1535 let collection = parse_sigma_yaml(yaml).unwrap();
1536 assert_eq!(collection.rules.len(), 2);
1537 assert!(collection.errors.is_empty());
1538
1539 assert_eq!(collection.rules[0].title, "Rule A");
1540 assert_eq!(collection.rules[1].title, "Rule B");
1541
1542 for rule in &collection.rules {
1544 assert_eq!(rule.logsource.product, Some("windows".to_string()));
1545 assert_eq!(rule.level, Some(crate::ast::Level::Medium));
1546 }
1547 }
1548
1549 #[test]
1550 fn test_correlation_condition_range() {
1551 let yaml = r#"
1552title: Base Rule
1553name: base_rule
1554logsource:
1555 product: windows
1556detection:
1557 selection:
1558 EventID: 1
1559 condition: selection
1560level: low
1561---
1562title: Range Correlation
1563name: range_test
1564correlation:
1565 type: event_count
1566 rules:
1567 - base_rule
1568 group-by:
1569 - User
1570 timespan: 1h
1571 condition:
1572 gt: 10
1573 lte: 100
1574"#;
1575 let collection = parse_sigma_yaml(yaml).unwrap();
1576 assert_eq!(collection.correlations.len(), 1);
1577 let corr = &collection.correlations[0];
1578
1579 match &corr.condition {
1580 CorrelationCondition::Threshold {
1581 predicates, field, ..
1582 } => {
1583 assert_eq!(predicates.len(), 2);
1584 let has_gt = predicates
1586 .iter()
1587 .any(|(op, v)| *op == ConditionOperator::Gt && *v == 10);
1588 let has_lte = predicates
1589 .iter()
1590 .any(|(op, v)| *op == ConditionOperator::Lte && *v == 100);
1591 assert!(has_gt, "Expected gt: 10 predicate");
1592 assert!(has_lte, "Expected lte: 100 predicate");
1593 assert!(field.is_none());
1594 }
1595 _ => panic!("Expected threshold condition"),
1596 }
1597 }
1598
1599 #[test]
1600 fn test_correlation_condition_range_with_field() {
1601 let yaml = r#"
1602title: Base Rule
1603name: base_rule
1604logsource:
1605 product: windows
1606detection:
1607 selection:
1608 EventID: 1
1609 condition: selection
1610level: low
1611---
1612title: Range With Field
1613name: range_with_field
1614correlation:
1615 type: value_count
1616 rules:
1617 - base_rule
1618 group-by:
1619 - User
1620 timespan: 1h
1621 condition:
1622 gte: 5
1623 lt: 50
1624 field: TargetUser
1625"#;
1626 let collection = parse_sigma_yaml(yaml).unwrap();
1627 let corr = &collection.correlations[0];
1628
1629 match &corr.condition {
1630 CorrelationCondition::Threshold {
1631 predicates, field, ..
1632 } => {
1633 assert_eq!(predicates.len(), 2);
1634 assert_eq!(
1635 field.as_deref(),
1636 Some(["TargetUser".to_string()].as_slice())
1637 );
1638 }
1639 _ => panic!("Expected threshold condition"),
1640 }
1641 }
1642
1643 #[test]
1644 fn test_parse_neq_modifier() {
1645 let yaml = r#"
1646title: Neq Modifier
1647logsource:
1648 product: windows
1649detection:
1650 selection:
1651 Port|neq: 443
1652 condition: selection
1653level: medium
1654"#;
1655 let collection = parse_sigma_yaml(yaml).unwrap();
1656 let rule = &collection.rules[0];
1657 let det = rule.detection.named.get("selection").unwrap();
1658 match det {
1659 crate::ast::Detection::AllOf(items) => {
1660 assert!(items[0].field.modifiers.contains(&Modifier::Neq));
1661 }
1662 _ => panic!("Expected AllOf detection"),
1663 }
1664 }
1665
1666 #[test]
1667 fn test_parse_utf16be_modifier() {
1668 let yaml = r#"
1669title: Utf16be Modifier
1670logsource:
1671 product: windows
1672detection:
1673 selection:
1674 Payload|utf16be|base64: 'data'
1675 condition: selection
1676level: medium
1677"#;
1678 let collection = parse_sigma_yaml(yaml).unwrap();
1679 let rule = &collection.rules[0];
1680 let det = rule.detection.named.get("selection").unwrap();
1681 match det {
1682 crate::ast::Detection::AllOf(items) => {
1683 assert!(items[0].field.modifiers.contains(&Modifier::Utf16be));
1684 assert!(items[0].field.modifiers.contains(&Modifier::Base64));
1685 }
1686 _ => panic!("Expected AllOf detection"),
1687 }
1688 }
1689
1690 #[test]
1691 fn test_parse_utf16_modifier() {
1692 let yaml = r#"
1693title: Utf16 BOM Modifier
1694logsource:
1695 product: windows
1696detection:
1697 selection:
1698 Payload|utf16|base64: 'data'
1699 condition: selection
1700level: medium
1701"#;
1702 let collection = parse_sigma_yaml(yaml).unwrap();
1703 let rule = &collection.rules[0];
1704 let det = rule.detection.named.get("selection").unwrap();
1705 match det {
1706 crate::ast::Detection::AllOf(items) => {
1707 assert!(items[0].field.modifiers.contains(&Modifier::Utf16));
1708 assert!(items[0].field.modifiers.contains(&Modifier::Base64));
1709 }
1710 _ => panic!("Expected AllOf detection"),
1711 }
1712 }
1713
1714 #[test]
1717 fn test_action_reset_clears_global() {
1718 let yaml = r#"
1719action: global
1720title: Global Template
1721logsource:
1722 product: windows
1723level: high
1724---
1725detection:
1726 selection:
1727 EventID: 1
1728 condition: selection
1729---
1730action: reset
1731---
1732title: After Reset
1733logsource:
1734 product: linux
1735detection:
1736 selection:
1737 command: ls
1738 condition: selection
1739level: low
1740"#;
1741 let collection = parse_sigma_yaml(yaml).unwrap();
1742 assert!(
1743 collection.errors.is_empty(),
1744 "errors: {:?}",
1745 collection.errors
1746 );
1747 assert_eq!(collection.rules.len(), 2);
1748
1749 assert_eq!(collection.rules[0].title, "Global Template");
1751 assert_eq!(
1752 collection.rules[0].logsource.product,
1753 Some("windows".to_string())
1754 );
1755 assert_eq!(collection.rules[0].level, Some(Level::High));
1756
1757 assert_eq!(collection.rules[1].title, "After Reset");
1759 assert_eq!(
1760 collection.rules[1].logsource.product,
1761 Some("linux".to_string())
1762 );
1763 assert_eq!(collection.rules[1].level, Some(Level::Low));
1764 }
1765
1766 #[test]
1767 fn test_global_repeat_reset_combined() {
1768 let yaml = r#"
1769action: global
1770logsource:
1771 product: windows
1772level: medium
1773---
1774title: Rule A
1775detection:
1776 selection:
1777 EventID: 1
1778 condition: selection
1779---
1780action: repeat
1781title: Rule B
1782detection:
1783 selection:
1784 EventID: 2
1785 condition: selection
1786---
1787action: reset
1788---
1789title: Rule C
1790logsource:
1791 product: linux
1792detection:
1793 selection:
1794 command: cat
1795 condition: selection
1796level: low
1797"#;
1798 let collection = parse_sigma_yaml(yaml).unwrap();
1799 assert!(
1800 collection.errors.is_empty(),
1801 "errors: {:?}",
1802 collection.errors
1803 );
1804 assert_eq!(collection.rules.len(), 3);
1805
1806 assert_eq!(collection.rules[0].title, "Rule A");
1808 assert_eq!(
1809 collection.rules[0].logsource.product,
1810 Some("windows".to_string())
1811 );
1812 assert_eq!(collection.rules[0].level, Some(Level::Medium));
1813
1814 assert_eq!(collection.rules[1].title, "Rule B");
1816 assert_eq!(
1817 collection.rules[1].logsource.product,
1818 Some("windows".to_string())
1819 );
1820 assert_eq!(collection.rules[1].level, Some(Level::Medium));
1821
1822 assert_eq!(collection.rules[2].title, "Rule C");
1824 assert_eq!(
1825 collection.rules[2].logsource.product,
1826 Some("linux".to_string())
1827 );
1828 assert_eq!(collection.rules[2].level, Some(Level::Low));
1829 }
1830
1831 #[test]
1832 fn test_deep_repeat_chain() {
1833 let yaml = r#"
1834title: Base
1835logsource:
1836 product: windows
1837 category: process_creation
1838level: low
1839detection:
1840 selection:
1841 CommandLine|contains: 'cmd'
1842 condition: selection
1843---
1844action: repeat
1845title: Second
1846level: medium
1847detection:
1848 selection:
1849 CommandLine|contains: 'powershell'
1850 condition: selection
1851---
1852action: repeat
1853title: Third
1854level: high
1855detection:
1856 selection:
1857 CommandLine|contains: 'wscript'
1858 condition: selection
1859---
1860action: repeat
1861title: Fourth
1862detection:
1863 selection:
1864 CommandLine|contains: 'cscript'
1865 condition: selection
1866"#;
1867 let collection = parse_sigma_yaml(yaml).unwrap();
1868 assert!(
1869 collection.errors.is_empty(),
1870 "errors: {:?}",
1871 collection.errors
1872 );
1873 assert_eq!(collection.rules.len(), 4);
1874
1875 assert_eq!(collection.rules[0].level, Some(Level::Low));
1876 assert_eq!(collection.rules[1].level, Some(Level::Medium));
1877 assert_eq!(collection.rules[2].level, Some(Level::High));
1878 assert_eq!(collection.rules[3].level, Some(Level::High));
1880
1881 for rule in &collection.rules {
1883 assert_eq!(rule.logsource.product, Some("windows".to_string()));
1884 assert_eq!(
1885 rule.logsource.category,
1886 Some("process_creation".to_string())
1887 );
1888 }
1889 }
1890
1891 #[test]
1892 fn test_collect_errors_mixed_valid_invalid() {
1893 let yaml = r#"
1894title: Valid Rule
1895logsource:
1896 category: test
1897detection:
1898 selection:
1899 field: value
1900 condition: selection
1901level: low
1902---
1903title: Invalid Rule
1904detection:
1905 selection:
1906 field: value
1907"#;
1908 let collection = parse_sigma_yaml(yaml).unwrap();
1910 assert_eq!(collection.rules.len(), 1);
1911 assert_eq!(collection.rules[0].title, "Valid Rule");
1912 assert!(
1913 !collection.errors.is_empty(),
1914 "Expected errors for invalid doc"
1915 );
1916 }
1917
1918 #[test]
1919 fn test_reset_followed_by_repeat_inherits_previous() {
1920 let yaml = r#"
1924title: Base
1925logsource:
1926 category: test
1927detection:
1928 selection:
1929 field: val
1930 condition: selection
1931level: low
1932---
1933action: reset
1934---
1935action: repeat
1936title: Repeated After Reset
1937detection:
1938 selection:
1939 field: val2
1940 condition: selection
1941"#;
1942 let collection = parse_sigma_yaml(yaml).unwrap();
1943 assert!(
1944 collection.errors.is_empty(),
1945 "errors: {:?}",
1946 collection.errors
1947 );
1948 assert_eq!(collection.rules.len(), 2);
1949 assert_eq!(collection.rules[0].title, "Base");
1950 assert_eq!(collection.rules[1].title, "Repeated After Reset");
1951 assert_eq!(
1953 collection.rules[1].logsource.category,
1954 Some("test".to_string())
1955 );
1956 assert_eq!(collection.rules[1].level, Some(Level::Low));
1957 }
1958
1959 #[test]
1960 fn test_deep_merge_nested_maps() {
1961 let yaml = r#"
1962action: global
1963logsource:
1964 product: windows
1965 service: sysmon
1966 category: process_creation
1967---
1968title: Override Service
1969logsource:
1970 service: security
1971detection:
1972 selection:
1973 EventID: 1
1974 condition: selection
1975level: low
1976"#;
1977 let collection = parse_sigma_yaml(yaml).unwrap();
1978 assert!(
1979 collection.errors.is_empty(),
1980 "errors: {:?}",
1981 collection.errors
1982 );
1983 assert_eq!(collection.rules.len(), 1);
1984
1985 let rule = &collection.rules[0];
1986 assert_eq!(rule.logsource.product, Some("windows".to_string()));
1988 assert_eq!(rule.logsource.service, Some("security".to_string()));
1989 assert_eq!(
1990 rule.logsource.category,
1991 Some("process_creation".to_string())
1992 );
1993 }
1994
1995 #[test]
1996 fn test_line_feed_in_condition() {
1997 let yaml = r#"
1998title: Line Feed Condition rule
1999logsource:
2000 product: windows
2001detection:
2002 selection:
2003 Payload: 'data'
2004 replication_guid:
2005 Payload: 'guid'
2006 filter_machine_account:
2007 Payload: 'value'
2008 filter_known_service_accounts:
2009 Payload: 'value'
2010 filter_msol_prefix:
2011 Payload: 'value'
2012 filter_nt_authority_prefix:
2013 Payload: 'value'
2014 condition: >-
2015 selection and replication_guid
2016 and not (filter_machine_account or filter_known_service_accounts
2017 or filter_msol_prefix or filter_nt_authority_prefix)
2018level: medium
2019"#;
2020 let collection = parse_sigma_yaml(yaml).unwrap();
2021 assert!(
2022 collection.errors.is_empty(),
2023 "errors: {:?}",
2024 collection.errors
2025 );
2026 assert_eq!(collection.rules.len(), 1);
2027 }
2028
2029 #[test]
2030 fn test_parse_detection_rule_custom_attributes_arbitrary_keys() {
2031 let yaml = r#"
2032title: Test Rule With Custom Attrs
2033logsource:
2034 product: windows
2035 category: process_creation
2036detection:
2037 selection:
2038 CommandLine|contains: 'whoami'
2039 condition: selection
2040level: medium
2041my_custom_field: some_value
2042severity_score: 42
2043organization: ACME Corp
2044custom_list:
2045 - item1
2046 - item2
2047custom_object:
2048 key1: val1
2049 key2: val2
2050"#;
2051 let collection = parse_sigma_yaml(yaml).unwrap();
2052 assert_eq!(collection.rules.len(), 1);
2053
2054 let rule = &collection.rules[0];
2055 assert_eq!(rule.title, "Test Rule With Custom Attrs");
2056
2057 assert_eq!(
2058 rule.custom_attributes.get("my_custom_field"),
2059 Some(&Value::String("some_value".to_string()))
2060 );
2061 assert_eq!(
2062 rule.custom_attributes
2063 .get("severity_score")
2064 .and_then(|v| v.as_u64()),
2065 Some(42)
2066 );
2067 assert_eq!(
2068 rule.custom_attributes.get("organization"),
2069 Some(&Value::String("ACME Corp".to_string()))
2070 );
2071
2072 let custom_list = rule.custom_attributes.get("custom_list").unwrap();
2073 assert!(custom_list.is_sequence());
2074
2075 let custom_obj = rule.custom_attributes.get("custom_object").unwrap();
2076 assert!(custom_obj.is_mapping());
2077
2078 assert!(!rule.custom_attributes.contains_key("title"));
2079 assert!(!rule.custom_attributes.contains_key("logsource"));
2080 assert!(!rule.custom_attributes.contains_key("detection"));
2081 assert!(!rule.custom_attributes.contains_key("level"));
2082 assert!(!rule.custom_attributes.contains_key("custom_attributes"));
2083 }
2084
2085 #[test]
2086 fn test_parse_detection_rule_no_custom_attributes() {
2087 let yaml = r#"
2088title: Standard Rule
2089logsource:
2090 category: test
2091detection:
2092 selection:
2093 field: value
2094 condition: selection
2095level: low
2096"#;
2097 let collection = parse_sigma_yaml(yaml).unwrap();
2098 let rule = &collection.rules[0];
2099 assert!(rule.custom_attributes.is_empty());
2100 }
2101
2102 #[test]
2103 fn test_parse_detection_rule_custom_attributes_explicit_block() {
2104 let yaml = r#"
2105title: Rule With Custom Attrs
2106custom_attributes:
2107 rsigma.suppress: 5m
2108 rsigma.action: reset
2109logsource:
2110 category: test
2111detection:
2112 selection:
2113 field: value
2114 condition: selection
2115level: low
2116"#;
2117 let collection = parse_sigma_yaml(yaml).unwrap();
2118 let rule = &collection.rules[0];
2119 assert_eq!(
2120 rule.custom_attributes
2121 .get("rsigma.suppress")
2122 .and_then(Value::as_str),
2123 Some("5m")
2124 );
2125 assert_eq!(
2126 rule.custom_attributes
2127 .get("rsigma.action")
2128 .and_then(Value::as_str),
2129 Some("reset")
2130 );
2131 assert!(!rule.custom_attributes.contains_key("custom_attributes"));
2133 }
2134
2135 #[test]
2136 fn test_parse_detection_rule_custom_attributes_explicit_overrides_toplevel() {
2137 let yaml = r#"
2140title: Merge Test
2141priority: top
2142custom_attributes:
2143 priority: explicit
2144logsource:
2145 category: test
2146detection:
2147 selection:
2148 field: value
2149 condition: selection
2150"#;
2151 let collection = parse_sigma_yaml(yaml).unwrap();
2152 let rule = &collection.rules[0];
2153 assert_eq!(
2154 rule.custom_attributes
2155 .get("priority")
2156 .and_then(Value::as_str),
2157 Some("explicit")
2158 );
2159 }
2160
2161 #[test]
2162 fn test_parse_correlation_rule_custom_attributes_arbitrary_keys() {
2163 let yaml = r#"
2164title: Login
2165id: login-rule
2166logsource:
2167 category: auth
2168detection:
2169 selection:
2170 EventType: login
2171 condition: selection
2172---
2173title: Many Logins
2174name: reserved_name
2175tags:
2176 - test.tag
2177taxonomy: test.taxonomy
2178falsepositives:
2179 - benign activity
2180generate: false
2181my_custom_correlation_field: custom_value
2182priority: high_priority
2183correlation:
2184 type: event_count
2185 rules:
2186 - login-rule
2187 group-by:
2188 - User
2189 timespan: 60s
2190 condition:
2191 gte: 3
2192level: high
2193"#;
2194 let collection = parse_sigma_yaml(yaml).unwrap();
2195 assert_eq!(collection.correlations.len(), 1);
2196
2197 let corr = &collection.correlations[0];
2198 assert_eq!(
2199 corr.custom_attributes.get("my_custom_correlation_field"),
2200 Some(&Value::String("custom_value".to_string()))
2201 );
2202 assert_eq!(
2203 corr.custom_attributes.get("priority"),
2204 Some(&Value::String("high_priority".to_string()))
2205 );
2206
2207 assert!(!corr.custom_attributes.contains_key("title"));
2208 assert!(!corr.custom_attributes.contains_key("correlation"));
2209 assert!(!corr.custom_attributes.contains_key("level"));
2210 assert!(!corr.custom_attributes.contains_key("id"));
2211 assert!(!corr.custom_attributes.contains_key("name"));
2212 assert!(!corr.custom_attributes.contains_key("tags"));
2213 assert!(!corr.custom_attributes.contains_key("taxonomy"));
2214 assert!(!corr.custom_attributes.contains_key("falsepositives"));
2215 assert!(!corr.custom_attributes.contains_key("generate"));
2216 assert!(!corr.custom_attributes.contains_key("custom_attributes"));
2217 }
2218
2219 #[test]
2220 fn test_parse_correlation_rule_schema_top_level_metadata() {
2221 let yaml = r#"
2222title: Login
2223id: login-rule
2224logsource:
2225 category: auth
2226detection:
2227 selection:
2228 EventType: login
2229 condition: selection
2230---
2231title: Many Logins
2232name: bucket_enum_corr
2233tags:
2234 - attack.collection
2235taxonomy: enterprise_attack
2236falsepositives:
2237 - Scheduled backups
2238generate: true
2239correlation:
2240 type: event_count
2241 rules:
2242 - login-rule
2243 group-by:
2244 - User
2245 timespan: 60s
2246 condition:
2247 gte: 3
2248level: high
2249"#;
2250 let collection = parse_sigma_yaml(yaml).unwrap();
2251 assert_eq!(collection.correlations.len(), 1);
2252 let corr = &collection.correlations[0];
2253 assert_eq!(corr.name.as_deref(), Some("bucket_enum_corr"));
2254 assert_eq!(corr.tags, vec!["attack.collection"]);
2255 assert_eq!(corr.taxonomy.as_deref(), Some("enterprise_attack"));
2256 assert_eq!(corr.falsepositives, vec!["Scheduled backups"]);
2257 assert!(corr.generate);
2258 }
2259
2260 #[test]
2261 fn test_parse_correlation_generate_nested_fallback() {
2262 let yaml = r#"
2263title: Nested Gen
2264correlation:
2265 type: temporal
2266 rules:
2267 - a
2268 group-by:
2269 - x
2270 timespan: 1m
2271 generate: true
2272"#;
2273 let collection = parse_sigma_yaml(yaml).unwrap();
2274 assert!(collection.correlations[0].generate);
2275 }
2276}