mod correlation;
mod detection;
mod filter;
#[cfg(test)]
mod tests;
pub use detection::parse_field_spec;
use std::collections::HashMap;
use std::path::Path;
use serde::Deserialize;
use serde_yaml::Value;
use crate::ast::*;
use crate::error::{Result, SigmaParserError};
pub fn parse_sigma_yaml(yaml: &str) -> Result<SigmaCollection> {
let mut collection = SigmaCollection::new();
let mut global: Option<Value> = None;
let mut previous: Option<Value> = None;
for doc in serde_yaml::Deserializer::from_str(yaml) {
let value: Value = match Value::deserialize(doc) {
Ok(v) => v,
Err(e) => {
collection.errors.push(format!("YAML parse error: {e}"));
break;
}
};
let Some(mapping) = value.as_mapping() else {
collection
.errors
.push("Document is not a YAML mapping".to_string());
continue;
};
if let Some(action_val) = mapping.get(Value::String("action".to_string())) {
let Some(action) = action_val.as_str() else {
collection.errors.push(format!(
"collection 'action' must be a string, got: {action_val:?}"
));
continue;
};
match action {
"global" => {
let mut global_map = value.clone();
if let Some(m) = global_map.as_mapping_mut() {
m.remove(Value::String("action".to_string()));
}
global = Some(global_map);
continue;
}
"reset" => {
global = None;
continue;
}
"repeat" => {
if let Some(ref prev) = previous {
let mut repeat_val = value.clone();
if let Some(m) = repeat_val.as_mapping_mut() {
m.remove(Value::String("action".to_string()));
}
let merged_repeat = deep_merge(prev.clone(), repeat_val)?;
let final_val = if let Some(ref global_val) = global {
deep_merge(global_val.clone(), merged_repeat)?
} else {
merged_repeat
};
previous = Some(final_val.clone());
match parse_document(&final_val) {
Ok(doc) => match doc {
SigmaDocument::Rule(rule) => collection.rules.push(*rule),
SigmaDocument::Correlation(corr) => {
collection.correlations.push(corr)
}
SigmaDocument::Filter(filter) => collection.filters.push(filter),
},
Err(e) => {
collection.errors.push(e.to_string());
}
}
} else {
collection
.errors
.push("'action: repeat' without a previous document".to_string());
}
continue;
}
other => {
collection
.errors
.push(format!("Unknown collection action: {other}"));
continue;
}
}
}
let merged = if let Some(ref global_val) = global {
deep_merge(global_val.clone(), value)?
} else {
value
};
previous = Some(merged.clone());
match parse_document(&merged) {
Ok(doc) => match doc {
SigmaDocument::Rule(rule) => collection.rules.push(*rule),
SigmaDocument::Correlation(corr) => collection.correlations.push(corr),
SigmaDocument::Filter(filter) => collection.filters.push(filter),
},
Err(e) => {
collection.errors.push(e.to_string());
}
}
}
Ok(collection)
}
pub fn parse_sigma_file(path: &Path) -> Result<SigmaCollection> {
let content = std::fs::read_to_string(path)?;
parse_sigma_yaml(&content)
}
pub fn parse_sigma_directory(dir: &Path) -> Result<SigmaCollection> {
let mut collection = SigmaCollection::new();
fn walk(dir: &Path, collection: &mut SigmaCollection) -> Result<()> {
for entry in std::fs::read_dir(dir)? {
let entry = entry?;
let path = entry.path();
if path.is_dir() {
walk(&path, collection)?;
} else if matches!(
path.extension().and_then(|e| e.to_str()),
Some("yml" | "yaml")
) {
match parse_sigma_file(&path) {
Ok(sub) => {
collection.rules.extend(sub.rules);
collection.correlations.extend(sub.correlations);
collection.filters.extend(sub.filters);
collection.errors.extend(sub.errors);
}
Err(e) => {
collection.errors.push(format!("{}: {e}", path.display()));
}
}
}
}
Ok(())
}
walk(dir, &mut collection)?;
Ok(collection)
}
fn parse_document(value: &Value) -> Result<SigmaDocument> {
let mapping = value
.as_mapping()
.ok_or_else(|| SigmaParserError::InvalidRule("Document is not a YAML mapping".into()))?;
if mapping.contains_key(Value::String("correlation".into())) {
correlation::parse_correlation_rule(value).map(SigmaDocument::Correlation)
} else if mapping.contains_key(Value::String("filter".into())) {
filter::parse_filter_rule(value).map(SigmaDocument::Filter)
} else {
detection::parse_detection_rule(value).map(|r| SigmaDocument::Rule(Box::new(r)))
}
}
pub(super) fn collect_custom_attributes(
m: &serde_yaml::Mapping,
standard_keys: &[&str],
) -> HashMap<String, Value> {
let mut attrs: HashMap<String, Value> = m
.iter()
.filter_map(|(k, v)| {
let key = k.as_str()?;
if standard_keys.contains(&key) {
None
} else {
Some((key.to_string(), v.clone()))
}
})
.collect();
if let Some(Value::Mapping(explicit)) = m.get(val_key("custom_attributes")) {
for (k, v) in explicit {
if let Some(key) = k.as_str() {
attrs.insert(key.to_string(), v.clone());
}
}
}
attrs
}
pub(super) fn parse_logsource(value: &Value) -> Result<LogSource> {
let m = value
.as_mapping()
.ok_or_else(|| SigmaParserError::InvalidRule("logsource must be a mapping".into()))?;
let mut custom = HashMap::new();
let known_keys = ["category", "product", "service", "definition"];
for (k, v) in m {
let key_str = k.as_str().unwrap_or("");
if !known_keys.contains(&key_str) && !key_str.is_empty() {
match v.as_str() {
Some(val_str) => {
custom.insert(key_str.to_string(), val_str.to_string());
}
None => {
log::warn!(
"logsource custom field '{key_str}' has non-string value ({v:?}), skipping"
);
}
}
}
}
Ok(LogSource {
category: get_str(m, "category").map(|s| s.to_string()),
product: get_str(m, "product").map(|s| s.to_string()),
service: get_str(m, "service").map(|s| s.to_string()),
definition: get_str(m, "definition").map(|s| s.to_string()),
custom,
})
}
pub(super) fn parse_related(value: Option<&Value>) -> Vec<Related> {
let Some(Value::Sequence(seq)) = value else {
return Vec::new();
};
seq.iter()
.filter_map(|item| {
let m = item.as_mapping()?;
let id = get_str(m, "id")?.to_string();
let type_str = get_str(m, "type")?;
let relation_type = type_str.parse().ok()?;
Some(Related { id, relation_type })
})
.collect()
}
pub(super) fn val_key(s: &str) -> Value {
Value::String(s.to_string())
}
pub(super) fn get_str<'a>(m: &'a serde_yaml::Mapping, key: &str) -> Option<&'a str> {
m.get(val_key(key)).and_then(|v| v.as_str())
}
pub(super) fn get_str_list(m: &serde_yaml::Mapping, key: &str) -> Vec<String> {
match m.get(val_key(key)) {
Some(Value::String(s)) => vec![s.clone()],
Some(Value::Sequence(seq)) => seq
.iter()
.filter_map(|v| v.as_str().map(|s| s.to_string()))
.collect(),
_ => Vec::new(),
}
}
fn deep_merge(dest: Value, src: Value) -> crate::error::Result<Value> {
const MAX_DEPTH: usize = 64;
let (mut root_dest, root_src) = match (dest, src) {
(Value::Mapping(d), Value::Mapping(s)) => (d, s),
(_, src) => return Ok(src),
};
fn merge_level(
dest: &mut serde_yaml::Mapping,
src: serde_yaml::Mapping,
depth: usize,
) -> crate::error::Result<()> {
if depth > MAX_DEPTH {
return Err(crate::error::SigmaParserError::MergeTooDeep(MAX_DEPTH));
}
for (k, v) in src {
if let Some(existing) = dest.remove(&k) {
match (existing, v) {
(Value::Mapping(mut d), Value::Mapping(s)) => {
merge_level(&mut d, s, depth + 1)?;
dest.insert(k, Value::Mapping(d));
}
(_, src_val) => {
dest.insert(k, src_val);
}
}
} else {
dest.insert(k, v);
}
}
Ok(())
}
merge_level(&mut root_dest, root_src, 0)?;
Ok(Value::Mapping(root_dest))
}