use std::collections::HashMap;
use yaml_serde::Value;
use crate::ast::*;
use crate::condition::parse_condition;
use crate::error::{Result, SigmaParserError};
use crate::fieldpath::{ends_with_unescaped, escape_brackets, first_unescaped};
use crate::value::SigmaValue;
use super::{
collect_custom_attributes, get_str, get_str_list, parse_enum_with_warn, parse_logsource,
parse_related, parse_sigma_version, val_key,
};
pub(super) fn parse_detection_rule(value: &Value, warnings: &mut Vec<String>) -> Result<SigmaRule> {
let m = value
.as_mapping()
.ok_or_else(|| SigmaParserError::InvalidRule("Expected a YAML mapping".into()))?;
let title = get_str(m, "title")
.ok_or_else(|| SigmaParserError::MissingField("title".into()))?
.to_string();
let sigma_version = parse_sigma_version(m, warnings);
let detection_val = m
.get(val_key("detection"))
.ok_or_else(|| SigmaParserError::MissingField("detection".into()))?;
let detection = parse_detections(
detection_val,
crate::version::array_matching_enabled(sigma_version),
)?;
let logsource = m
.get(val_key("logsource"))
.map(parse_logsource)
.transpose()?
.unwrap_or_default();
let standard_rule_keys: &[&str] = &[
"title",
"sigma-version",
"id",
"related",
"name",
"taxonomy",
"status",
"description",
"license",
"author",
"references",
"date",
"modified",
"logsource",
"detection",
"fields",
"falsepositives",
"level",
"tags",
"scope",
"custom_attributes",
];
let custom_attributes = collect_custom_attributes(m, standard_rule_keys);
Ok(SigmaRule {
title,
logsource,
detection,
sigma_version,
id: get_str(m, "id").map(|s| s.to_string()),
name: get_str(m, "name").map(|s| s.to_string()),
related: parse_related(m.get(val_key("related")), warnings),
taxonomy: get_str(m, "taxonomy").map(|s| s.to_string()),
status: parse_enum_with_warn(get_str(m, "status"), "status", warnings),
description: get_str(m, "description").map(|s| s.to_string()),
license: get_str(m, "license").map(|s| s.to_string()),
author: get_str(m, "author").map(|s| s.to_string()),
references: get_str_list(m, "references"),
date: get_str(m, "date").map(|s| s.to_string()),
modified: get_str(m, "modified").map(|s| s.to_string()),
fields: get_str_list(m, "fields"),
falsepositives: get_str_list(m, "falsepositives"),
level: parse_enum_with_warn(get_str(m, "level"), "level", warnings),
tags: get_str_list(m, "tags"),
scope: get_str_list(m, "scope"),
custom_attributes,
})
}
pub(super) fn parse_detections(value: &Value, array_matching: bool) -> Result<Detections> {
let m = value.as_mapping().ok_or_else(|| {
SigmaParserError::InvalidDetection("Detection section must be a mapping".into())
})?;
let condition_val = m
.get(val_key("condition"))
.ok_or_else(|| SigmaParserError::MissingField("condition".into()))?;
let condition_strings = match condition_val {
Value::String(s) => vec![s.clone()],
Value::Sequence(seq) => {
let mut strings = Vec::with_capacity(seq.len());
for v in seq {
match v.as_str() {
Some(s) => strings.push(s.to_string()),
None => {
return Err(SigmaParserError::InvalidDetection(format!(
"condition list items must be strings, got: {v:?}"
)));
}
}
}
strings
}
_ => {
return Err(SigmaParserError::InvalidDetection(
"condition must be a string or list of strings".into(),
));
}
};
let conditions: Vec<ConditionExpr> = condition_strings
.iter()
.map(|s| parse_condition(s))
.collect::<Result<Vec<_>>>()?;
let timeframe = get_str(m, "timeframe").map(|s| s.to_string());
let mut named = HashMap::new();
for (key, val) in m {
let key_str = key.as_str().unwrap_or("");
if key_str == "condition" || key_str == "timeframe" {
continue;
}
named.insert(key_str.to_string(), parse_detection(val, array_matching)?);
}
Ok(Detections {
named,
conditions,
condition_strings,
timeframe,
})
}
fn parse_detection(value: &Value, array_matching: bool) -> Result<Detection> {
match value {
Value::Mapping(m) => {
let mut items: Vec<DetectionItem> = Vec::new();
let mut blocks: Vec<Detection> = Vec::new();
for (k, v) in m.iter() {
match parse_map_entry(k.as_str().unwrap_or(""), v, array_matching)? {
ParsedEntry::Item(item) => items.push(item),
ParsedEntry::Block(block) => blocks.push(block),
}
}
Ok(combine_entries(items, blocks))
}
Value::Sequence(seq) => {
let all_plain = seq.iter().all(|v| !v.is_mapping() && !v.is_sequence());
if all_plain {
let values = seq.iter().map(SigmaValue::from_yaml).collect();
Ok(Detection::Keywords(values))
} else {
let subs: Vec<Detection> = seq
.iter()
.map(|v| parse_detection(v, array_matching))
.collect::<Result<Vec<_>>>()?;
Ok(Detection::AnyOf(subs))
}
}
_ => Ok(Detection::Keywords(vec![SigmaValue::from_yaml(value)])),
}
}
fn parse_detection_item(key: &str, value: &Value) -> Result<DetectionItem> {
let field = parse_field_spec(key)?;
let values = match value {
Value::Sequence(seq) => seq.iter().map(|v| to_sigma_value(v, &field)).collect(),
_ => vec![to_sigma_value(value, &field)],
};
Ok(DetectionItem { field, values })
}
struct PathSegment {
name: String,
index: Option<i64>,
quantifier: Option<ArrayQuantifier>,
}
impl PathSegment {
fn path_str(&self) -> String {
match self.index {
Some(i) => format!("{}[{i}]", self.name),
None => self.name.clone(),
}
}
}
enum ParsedEntry {
Item(DetectionItem),
Block(Detection),
}
fn combine_entries(items: Vec<DetectionItem>, blocks: Vec<Detection>) -> Detection {
if blocks.is_empty() {
Detection::AllOf(items)
} else if items.is_empty() && blocks.len() == 1 {
blocks.into_iter().next().expect("len checked")
} else {
let mut parts: Vec<Detection> = Vec::new();
if !items.is_empty() {
parts.push(Detection::AllOf(items));
}
parts.extend(blocks);
Detection::And(parts)
}
}
fn parse_map_entry(key: &str, value: &Value, array_matching: bool) -> Result<ParsedEntry> {
let (field_part, modifier_part) = match key.split_once('|') {
Some((f, m)) => (f, Some(m)),
None => (key, None),
};
if field_part.is_empty() {
return Ok(ParsedEntry::Item(parse_detection_item(key, value)?));
}
if !array_matching {
let escaped = escape_brackets(field_part);
let plain_key = match modifier_part {
Some(m) => format!("{escaped}|{m}"),
None => escaped.into_owned(),
};
return Ok(ParsedEntry::Item(parse_detection_item(&plain_key, value)?));
}
let segments = parse_field_path(field_part)?;
match segments.iter().position(|s| s.quantifier.is_some()) {
Some(idx) => {
let quantifier = segments[idx]
.quantifier
.expect("position found a quantifier");
let array_field = segments[..=idx]
.iter()
.map(PathSegment::path_str)
.collect::<Vec<_>>()
.join(".");
let body =
build_block_body(&segments[idx + 1..], modifier_part, value, array_matching)?;
Ok(ParsedEntry::Block(Detection::ArrayMatch {
field: array_field,
quantifier,
body: Box::new(body),
}))
}
None => {
let has_index = segments.iter().any(|s| s.index.is_some());
if value.is_mapping() && has_index {
let prefix = reconstruct_key(&segments, None);
Ok(ParsedEntry::Block(parse_block_with_prefix(
&prefix,
value,
array_matching,
)?))
} else {
Ok(ParsedEntry::Item(parse_detection_item(key, value)?))
}
}
}
}
fn build_block_body(
remaining: &[PathSegment],
modifier_part: Option<&str>,
value: &Value,
array_matching: bool,
) -> Result<Detection> {
if remaining.is_empty() {
match value {
Value::Mapping(m) => {
if modifier_part.is_some() {
return Err(SigmaParserError::InvalidFieldSpec(
"value modifiers cannot be applied to an array object-scope block; \
move the modifier onto a field inside the block"
.into(),
));
}
if m.iter().any(|(k, _)| k.as_str() == Some("condition")) {
parse_extended_block_body(value, array_matching)
} else {
parse_detection(value, array_matching)
}
}
_ => {
let modifiers = parse_modifiers(modifier_part)?;
let field = FieldSpec::new(None, modifiers);
let values = match value {
Value::Sequence(seq) => seq.iter().map(|v| to_sigma_value(v, &field)).collect(),
_ => vec![to_sigma_value(value, &field)],
};
Ok(Detection::AllOf(vec![DetectionItem { field, values }]))
}
}
} else if value.is_mapping() {
let prefix = reconstruct_key(remaining, None);
parse_block_with_prefix(&prefix, value, array_matching)
} else {
let remaining_key = reconstruct_key(remaining, modifier_part);
match parse_map_entry(&remaining_key, value, array_matching)? {
ParsedEntry::Item(item) => Ok(Detection::AllOf(vec![item])),
ParsedEntry::Block(block) => Ok(block),
}
}
}
fn parse_extended_block_body(value: &Value, array_matching: bool) -> Result<Detection> {
let m = value.as_mapping().ok_or_else(|| {
SigmaParserError::InvalidDetection("extended array block body must be a mapping".into())
})?;
let mut named: HashMap<String, Detection> = HashMap::new();
let mut condition: Option<ConditionExpr> = None;
for (k, v) in m.iter() {
let key = k.as_str().ok_or_else(|| {
SigmaParserError::InvalidDetection("non-string key in array block body".into())
})?;
if key == "condition" {
condition = Some(parse_block_condition(v)?);
} else {
named.insert(key.to_string(), parse_detection(v, array_matching)?);
}
}
let condition = condition.ok_or_else(|| {
SigmaParserError::InvalidDetection("extended array block requires a 'condition'".into())
})?;
if named.is_empty() {
return Err(SigmaParserError::InvalidDetection(
"extended array block has a 'condition' but no named sub-selections".into(),
));
}
Ok(Detection::Conditional { named, condition })
}
fn parse_block_condition(value: &Value) -> Result<ConditionExpr> {
match value {
Value::String(s) => parse_condition(s),
Value::Sequence(seq) => {
let exprs = seq
.iter()
.map(|x| {
let s = x.as_str().ok_or_else(|| {
SigmaParserError::InvalidDetection(
"array block 'condition' list items must be strings".into(),
)
})?;
parse_condition(s)
})
.collect::<Result<Vec<_>>>()?;
Ok(ConditionExpr::Or(exprs))
}
_ => Err(SigmaParserError::InvalidDetection(
"array block 'condition' must be a string or list of strings".into(),
)),
}
}
fn parse_block_with_prefix(prefix: &str, value: &Value, array_matching: bool) -> Result<Detection> {
let m = value.as_mapping().ok_or_else(|| {
SigmaParserError::InvalidDetection("array block body must be a mapping".into())
})?;
let mut items: Vec<DetectionItem> = Vec::new();
let mut blocks: Vec<Detection> = Vec::new();
for (k, v) in m.iter() {
let sub = k.as_str().unwrap_or("");
let key = format!("{prefix}.{sub}");
match parse_map_entry(&key, v, array_matching)? {
ParsedEntry::Item(item) => items.push(item),
ParsedEntry::Block(block) => blocks.push(block),
}
}
Ok(combine_entries(items, blocks))
}
fn parse_field_path(field_part: &str) -> Result<Vec<PathSegment>> {
let mut segments = Vec::new();
for raw in field_part.split('.') {
if let Some(open) = first_unescaped(raw, b'[')
&& ends_with_unescaped(raw, b']')
{
let name = &raw[..open];
let token = &raw[open + 1..raw.len() - 1];
if name.is_empty() {
return Err(SigmaParserError::InvalidFieldSpec(format!(
"array selector without a field name in '{field_part}'"
)));
}
let (index, quantifier) = match token {
"any" => (None, Some(ArrayQuantifier::Any)),
"all" => (None, Some(ArrayQuantifier::All)),
"all_or_empty" => (None, Some(ArrayQuantifier::AllOrEmpty)),
"none" => (None, Some(ArrayQuantifier::None)),
_ => match token.parse::<i64>() {
Ok(n) => (Some(n), None),
Err(_) => {
return Err(SigmaParserError::InvalidFieldSpec(format!(
"unknown array selector '[{token}]' in field '{field_part}'; \
only [any], [all], [all_or_empty], [none], and an integer index \
[N] (negative counts from the end) are supported; \
escape a literal bracket as \\[ or \\]"
)));
}
},
};
segments.push(PathSegment {
name: name.to_string(),
index,
quantifier,
});
} else {
segments.push(PathSegment {
name: raw.to_string(),
index: None,
quantifier: None,
});
}
}
Ok(segments)
}
fn parse_modifiers(modifier_part: Option<&str>) -> Result<Vec<Modifier>> {
let mut modifiers = Vec::new();
if let Some(part) = modifier_part {
for mod_str in part.split('|') {
if mod_str == "not" {
return Err(SigmaParserError::NotIsNotAModifier);
}
let m = mod_str
.parse::<Modifier>()
.map_err(|_| SigmaParserError::UnknownModifier(mod_str.to_string()))?;
modifiers.push(m);
}
}
Ok(modifiers)
}
fn reconstruct_key(segments: &[PathSegment], modifier_part: Option<&str>) -> String {
let path = segments
.iter()
.map(|s| match s.quantifier {
Some(q) => format!("{}[{q}]", s.name),
None => s.path_str(),
})
.collect::<Vec<_>>()
.join(".");
match modifier_part {
Some(m) => format!("{path}|{m}"),
None => path,
}
}
fn to_sigma_value(v: &Value, field: &FieldSpec) -> SigmaValue {
if field.has_modifier(Modifier::Re)
&& let Value::String(s) = v
{
return SigmaValue::from_raw_string(s);
}
SigmaValue::from_yaml(v)
}
pub fn parse_field_spec(key: &str) -> Result<FieldSpec> {
if key.is_empty() {
return Ok(FieldSpec::new(None, Vec::new()));
}
let parts: Vec<&str> = key.split('|').collect();
let field_name = parts[0];
let field = if field_name.is_empty() || field_name == "." {
None
} else {
Some(field_name.to_string())
};
let mut modifiers = Vec::new();
for &mod_str in &parts[1..] {
if mod_str == "not" {
return Err(SigmaParserError::NotIsNotAModifier);
}
let m = mod_str
.parse::<Modifier>()
.map_err(|_| SigmaParserError::UnknownModifier(mod_str.to_string()))?;
modifiers.push(m);
}
Ok(FieldSpec::new(field, modifiers))
}