usiem/components/rule/
sigma.rs

1use std::{borrow::Cow, collections::BTreeMap, fmt::Display};
2
3use serde::{Deserialize, Serialize};
4
5use crate::prelude::{
6    mitre::{MitreTactics, MitreTechniques},
7    types::LogString,
8    AlertSeverity, SiemField,
9};
10
11use super::{
12    AlertContent, AlertGenerator, MitreInfo, RuleCondition, RuleOperator, SiemRule, SiemSubRule,
13};
14
15#[derive(Clone, Default, Debug, Serialize, Deserialize)]
16pub struct SigmaRule {
17    /// A brief title for the rule that should contain what the rules is supposed to detect (max. 256 characters)
18    pub title: LogString,
19    /// Sigma rules should be identified by a globally unique identifier in the id attribute. For this purpose random generated UUIDs (version 4) are recommended but not mandatory. An example for this is:
20    /// ```yml
21    /// title: Test rule
22    /// id: 929a690e-bef0-4204-a928-ef5e620d6fcc
23    /// ```
24    ///
25    /// Rule identifiers can and should change for the following reasons:
26    /// - Major changes in the rule. E.g. a different rule logic.
27    /// - Derivation of a new rule from an existing or refinement of a rule in a way that both are kept active.
28    /// - Merge of rules.
29    ///
30    /// To being able to keep track on relationships between detections, Sigma rules may also contain references to related rule identifiers in the related attribute. This allows to define common relationships between detections as follows:
31    ///
32    /// ```yml
33    /// related:
34    /// - id: 08fbc97d-0a2f-491c-ae21-8ffcfd3174e9
35    ///   type: derived
36    /// - id: 929a690e-bef0-4204-a928-ef5e620d6fcc
37    ///   type: obsoletes
38    /// ```
39    ///Currently the following types are defined:
40    /// - derived: Rule was derived from the referred rule or rules, which may remain active.
41    /// - obsoletes: Rule obsoletes the referred rule or rules, which aren't used anymore.
42    /// - merged: Rule was merged from the referred rules. The rules may be still existing and in use.
43    /// - renamed: The rule had previously the referred identifier or identifiers but was renamed for any other reason, e.g. from a private naming scheme to UUIDs, to resolve collisions etc. It's not expected that a rule with this id exists anymore.
44    ///
45    #[serde(skip_serializing_if = "Option::is_none")]
46    pub id: Option<LogString>,
47    /// A short description of the rule and the malicious activity that can be detected (max. 65,535 characters)
48    #[serde(skip_serializing_if = "Option::is_none")]
49    pub description: Option<LogString>,
50    /// References to the source that the rule was derived from. These could be blog articles, technical papers, presentations or even tweets.
51    #[serde(skip_serializing_if = "Option::is_none")]
52    pub references: Option<Vec<LogString>>,
53    /// Declares the status of the rule:
54    /// - stable: the rule is considered as stable and may be used in production systems or dashboards.
55    /// - test: an almost stable rule that possibly could require some fine tuning.
56    /// - experimental: an experimental rule that could lead to false results or be noisy, but could also identify interesting events.
57    #[serde(skip_serializing_if = "Option::is_none")]
58    pub status: Option<LogString>,
59    /// License of the rule according the SPDX ID specification: https://spdx.org/ids
60    #[serde(skip_serializing_if = "Option::is_none")]
61    pub license: Option<LogString>,
62    /// Creator of the rule.
63    #[serde(skip_serializing_if = "Option::is_none")]
64    pub author: Option<LogString>,
65    #[serde(skip_serializing_if = "Option::is_none")]
66    pub date: Option<LogString>,
67    ///This section describes the log data on which the detection is meant to be applied to. It describes the log source, the platform, the application and the type that is required in detection.
68    ///
69    ///It consists of three attributes that are evaluated automatically by the converters and an arbitrary number of optional elements. We recommend using a "definition" value in cases in which further explication is necessary.
70    ///
71    /// - category - examples: firewall, web, antivirus
72    /// - product - examples: windows, apache, check point fw1
73    /// - service - examples: sshd, applocker
74    ///
75    ///The "category" value is used to select all log files written by a certain group of products, like firewalls or web server logs. The automatic conversion will use the keyword as a selector for multiple indices.
76    ///
77    ///The "product" value is used to select all log outputs of a certain product, e.g. all Windows Eventlog types including "Security", "System", "Application" and the new log types like "AppLocker" and "Windows Defender".
78    ///
79    ///Use the "service" value to select only a subset of a product's logs, like the "sshd" on Linux or the "Security" Eventlog on Windows systems.
80    ///
81    ///The "definition" can be used to describe the log source, including some information on the log verbosity level or configurations that have to be applied. It is not automatically evaluated by the converters but gives useful advice to readers on how to configure the source to provide the necessary events used in the detection.
82    ///
83    ///You can use the values of 'category, 'product' and 'service' to point the converters to a certain index. You could define in the configuration files that the category 'firewall' converts to ( index=fw1* OR index=asa* ) during Splunk search conversion or the product 'windows' converts to "_index":"logstash-windows*" in ElasticSearch queries.
84    pub logsource: Cow<'static, SigmaRuleLogSource>,
85    /// A set of search-identifiers that represent searches on log data
86    pub detection: Cow<'static, SigmaRuleDetection>,
87    /// A list of log fields that could be interesting in further analysis of the event and should be displayed to the analyst.
88    #[serde(skip_serializing_if = "Option::is_none")]
89    pub fields: Option<Vec<LogString>>,
90    #[serde(skip_serializing_if = "Option::is_none")]
91    pub falsepositives: Option<Vec<LogString>>,
92    #[serde(skip_serializing_if = "Option::is_none")]
93    pub level: Option<LogString>,
94    #[serde(skip_serializing_if = "Option::is_none")]
95    pub tags: Option<Vec<LogString>>,
96}
97
98#[derive(Clone, Default, Debug, Serialize, Deserialize)]
99pub struct SigmaRuleDetection {
100    #[serde(flatten)]
101    pub search_identifiers: BTreeMap<LogString, SigmaRuleCondition>,
102    pub condition: LogString,
103}
104
105#[derive(Clone, Default, Debug, Serialize, Deserialize)]
106#[serde(untagged)]
107pub enum SigmaRuleCondition {
108    Map(BTreeMap<LogString, SigmaValue>),
109    List(Vec<BTreeMap<LogString, SigmaValue>>),
110    #[default]
111    None,
112}
113
114impl From<SigmaRuleCondition> for SiemSubRule {
115    fn from(val: SigmaRuleCondition) -> Self {
116        match val {
117            SigmaRuleCondition::Map(condition_list) => {
118                let mut conditions = Vec::with_capacity(16);
119                for (field, value) in condition_list {
120                    conditions.push(parse_rule_condition(field, value));
121                }
122                SiemSubRule {
123                    conditions,
124                    rule_state: None,
125                }
126            }
127            SigmaRuleCondition::List(condition_list) => {
128                let mut conditions = Vec::with_capacity(16);
129                for condition in condition_list {
130                    for (field, value) in condition {
131                        conditions.push(parse_rule_condition(field, value))
132                    }
133                }
134                SiemSubRule {
135                    conditions,
136                    rule_state: None,
137                }
138            }
139            SigmaRuleCondition::None => SiemSubRule {
140                conditions: vec![],
141                rule_state: None,
142            },
143        }
144    }
145}
146
147fn parse_rule_condition(field: LogString, value: SigmaValue) -> RuleCondition {
148    let mut iter = field.split('|');
149    let field_name = iter.next().unwrap_or("");
150    let operator = iter.next();
151    let extra = iter.next();
152    if let Some(val) = operator {
153        RuleCondition {
154            field: Cow::Owned(field_name.to_string()),
155            operator: translate_operator(val, extra, value),
156        }
157    } else {
158        RuleCondition {
159            field: Cow::Owned(field_name.to_string()),
160            operator: translate_content_to_operator(value),
161        }
162    }
163}
164
165fn translate_content_to_operator(value: SigmaValue) -> RuleOperator {
166    match value {
167        SigmaValue::Text(v) => {
168            let starts = v.starts_with('*');
169            let ends = v.ends_with('*') && !v.ends_with("\\*");
170            if starts && ends && v.len() > 2 {
171                RuleOperator::Contains(v[1..v.len() - 2].to_string())
172            } else if starts {
173                RuleOperator::StartsWith(v[1..].to_string())
174            } else if ends && v.len() > 1 {
175                RuleOperator::StartsWith(v[..v.len() - 1].to_string())
176            } else {
177                RuleOperator::Equals(SiemField::Text(Cow::Owned(v.to_string())))
178            }
179        }
180        SigmaValue::Int(v) => RuleOperator::Equals(SiemField::I64(v)),
181        SigmaValue::Float(v) => RuleOperator::Equals(SiemField::F64(v)),
182        SigmaValue::Array(v) => RuleOperator::Any(
183            v.into_iter()
184                .map(|v| Box::new(RuleOperator::Equals(v.into())))
185                .collect(),
186        ),
187        SigmaValue::None => RuleOperator::IsNull(true),
188    }
189}
190
191fn translate_operator(operator: &str, extra: Option<&str>, value: SigmaValue) -> RuleOperator {
192    match operator {
193        "equals" => RuleOperator::Equals(value.into()),
194        "contains" => match value {
195            SigmaValue::Text(v) => RuleOperator::Contains(v.to_string()),
196            SigmaValue::Int(v) => RuleOperator::Contains(format!("{}", v)),
197            SigmaValue::Float(v) => RuleOperator::Contains(format!("{}", v)),
198            SigmaValue::Array(v) => {
199                if let Some(extra) = extra {
200                    if extra == "all" {
201                        return RuleOperator::All(
202                            v.iter()
203                                .map(|v| Box::new(RuleOperator::Contains(v.to_string())))
204                                .collect(),
205                        );
206                    }
207                }
208                RuleOperator::Any(
209                    v.iter()
210                        .map(|v| Box::new(RuleOperator::Contains(v.to_string())))
211                        .collect(),
212                )
213            }
214            SigmaValue::None => RuleOperator::Contains(String::new()),
215        },
216        "endswith" => RuleOperator::EndsWith(value.to_string()),
217        "startswith" => RuleOperator::StartsWith(value.to_string()),
218        _ => RuleOperator::All(vec![]),
219    }
220}
221
222impl Display for SigmaValue {
223    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
224        match self {
225            SigmaValue::Text(v) => f.write_str(v),
226            SigmaValue::Int(v) => f.write_fmt(format_args!("{}", v)),
227            SigmaValue::Float(v) => f.write_fmt(format_args!("{}", v)),
228            SigmaValue::Array(list) => {
229                f.write_str("[")?;
230                for value in list {
231                    f.write_fmt(format_args!("{},", value))?;
232                }
233                f.write_str("]")
234            }
235            SigmaValue::None => return Err(std::fmt::Error::default()),
236        }?;
237        Ok(())
238    }
239}
240
241impl From<SigmaValue> for SiemField {
242    fn from(val: SigmaValue) -> Self {
243        match val {
244            SigmaValue::Text(v) => SiemField::Text(v),
245            SigmaValue::Int(v) => SiemField::I64(v),
246            SigmaValue::Float(v) => SiemField::F64(v),
247            SigmaValue::Array(v) => {
248                SiemField::Array(v.iter().map(|v| LogString::Owned(v.to_string())).collect())
249            }
250            SigmaValue::None => todo!(),
251        }
252    }
253}
254
255#[derive(Clone, Default, Debug, Serialize, Deserialize)]
256pub struct SigmaRuleLogSource {
257    #[serde(skip_serializing_if = "Option::is_none")]
258    pub category: Option<LogString>,
259    #[serde(skip_serializing_if = "Option::is_none")]
260    pub product: Option<LogString>,
261    #[serde(skip_serializing_if = "Option::is_none")]
262    pub service: Option<LogString>,
263    #[serde(skip_serializing_if = "Option::is_none")]
264    pub definition: Option<LogString>,
265}
266
267#[derive(Clone, Default, Debug, Serialize, Deserialize)]
268#[serde(untagged)]
269pub enum SigmaValue {
270    Text(LogString),
271    Int(i64),
272    Float(f64),
273    Array(Vec<SigmaValue>),
274    #[default]
275    None,
276}
277
278impl From<SigmaRule> for SiemRule {
279    fn from(val: SigmaRule) -> Self {
280        let mut slf = val;
281        let subrules = parse_subrules(&mut slf);
282        let conditions = Vec::with_capacity(16);
283        let description = slf.description.unwrap_or_default();
284        let alert_content = transform_alert_content(&description);
285        SiemRule {
286            id: slf.id.unwrap_or_default(),
287            name: slf.title,
288            mitre: Cow::Owned(MitreInfo {
289                tactics: slf
290                    .tags
291                    .as_ref()
292                    .map(|v| {
293                        v.iter()
294                            .filter(|t| t.starts_with("attack."))
295                            .map(|t| MitreTactics::try_from(&t[7..]))
296                            .filter_map(|v| v.ok())
297                            .collect()
298                    })
299                    .unwrap_or_default(),
300                techniques: slf
301                    .tags
302                    .as_ref()
303                    .map(|v| {
304                        v.iter()
305                            .filter(|t| t.starts_with("attack."))
306                            .map(|t| MitreTechniques::try_from(&t[7..]))
307                            .filter_map(|v| v.ok())
308                            .collect()
309                    })
310                    .unwrap_or_default(),
311            }),
312            description,
313            needed_datasets: vec![],
314            subrules: Cow::Owned(subrules),
315            conditions: Cow::Owned(conditions),
316            alert: Cow::Owned(AlertGenerator {
317                content: alert_content,
318                severity: level_to_severity(&slf.level.unwrap_or_default()),
319                tags: slf.tags.unwrap_or_default(),
320                aggregation: None,
321            }),
322        }
323    }
324}
325
326fn parse_subrules(rule: &mut SigmaRule) -> BTreeMap<LogString, SiemSubRule> {
327    let mut ret = BTreeMap::new();
328    for (id, condition) in &rule.detection.search_identifiers {
329        ret.insert(Cow::Owned(id.to_string()), condition.clone().into());
330    }
331    ret
332}
333
334fn level_to_severity(level: &str) -> AlertSeverity {
335    match level {
336        "info" => AlertSeverity::INFORMATIONAL,
337        "informational" => AlertSeverity::INFORMATIONAL,
338        "INFORMATIONAL" => AlertSeverity::INFORMATIONAL,
339        "low" => AlertSeverity::LOW,
340        "LOW" => AlertSeverity::LOW,
341        "medium" => AlertSeverity::MEDIUM,
342        "MEDIUM" => AlertSeverity::MEDIUM,
343        "high" => AlertSeverity::HIGH,
344        "HIGH" => AlertSeverity::HIGH,
345        "critical" => AlertSeverity::CRITICAL,
346        "CRITICAL" => AlertSeverity::CRITICAL,
347        "critic" => AlertSeverity::CRITICAL,
348        "CRITIC" => AlertSeverity::CRITICAL,
349        _ => AlertSeverity::LOW,
350    }
351}
352
353fn transform_alert_content(description: &str) -> Vec<AlertContent> {
354    let mut to_return = Vec::with_capacity(16);
355    let mut l = SigmaDescriptionLexer::new(description.chars().collect());
356    l.read_char();
357    loop {
358        match l.next_token() {
359            Token::EOF => break,
360            Token::FIELD(field) => {
361                to_return.push(AlertContent::Field(field));
362            }
363            Token::Text(text) => to_return.push(AlertContent::Text(text)),
364        }
365    }
366    if to_return.is_empty() {
367        to_return.push(AlertContent::Text(Cow::Owned(description.to_string())));
368    }
369    to_return
370}
371
372pub struct SigmaDescriptionLexer {
373    input: Vec<char>,
374    pub position: usize,
375    pub read_position: usize,
376    pub ch: char,
377}
378
379impl SigmaDescriptionLexer {
380    pub fn new(input: Vec<char>) -> Self {
381        Self {
382            input,
383            position: 0,
384            read_position: 0,
385            ch: '\0',
386        }
387    }
388
389    pub fn read_char(&mut self) {
390        if self.read_position >= self.input.len() {
391            self.ch = '\0';
392        } else {
393            self.ch = self.input[self.read_position];
394        }
395        self.position = self.read_position;
396        self.read_position += 1;
397    }
398
399    pub fn skip_whitespace(&mut self) {
400        loop {
401            let ch = self.ch;
402            if ch == ' ' || ch == '\t' || ch == '\n' || ch == '\r' {
403                self.read_char();
404            } else {
405                return;
406            }
407        }
408    }
409
410    pub fn next_token(&mut self) -> Token {
411        let read_text = |l: &mut SigmaDescriptionLexer| -> Vec<char> {
412            let position = l.position;
413            while l.position < l.input.len() && l.ch != '$' {
414                l.read_char();
415            }
416            let dt = l.input[position..l.position].to_vec();
417            l.read_position -= 1;
418            dt
419        };
420
421        let read_field = |l: &mut SigmaDescriptionLexer| -> Vec<char> {
422            let mut to_ret = Vec::with_capacity(32);
423            while l.position < l.input.len() && l.ch != ' ' {
424                to_ret.push(l.ch);
425                l.read_char();
426            }
427            l.read_position -= 1;
428            to_ret
429        };
430
431        let tok: Token = match self.ch {
432            '$' => {
433                self.read_char();
434                let data = read_field(self);
435                Token::FIELD(Cow::Owned(data.iter().collect()))
436            }
437            '\0' => Token::EOF,
438            _ => {
439                let data = read_text(self);
440                Token::Text(Cow::Owned(data.iter().collect()))
441            }
442        };
443        self.read_char();
444        tok
445    }
446}
447
448#[derive(Debug, PartialEq)]
449pub enum Token {
450    Text(LogString),
451    FIELD(LogString),
452    EOF,
453}
454
455#[test]
456fn should_translate_condition() {
457    let input = String::from(
458        "This is a basic description with source.ip=$source.ip and this a text $field123",
459    );
460    let mut l = SigmaDescriptionLexer::new(input.chars().collect());
461    l.read_char();
462    assert_eq!(
463        Token::Text(LogString::Borrowed(
464            "This is a basic description with source.ip="
465        )),
466        l.next_token()
467    );
468    assert_eq!(
469        Token::FIELD(LogString::Borrowed("source.ip")),
470        l.next_token()
471    );
472    assert_eq!(
473        Token::Text(LogString::Borrowed(" and this a text ")),
474        l.next_token()
475    );
476    assert_eq!(
477        Token::FIELD(LogString::Borrowed("field123")),
478        l.next_token()
479    );
480    assert_eq!(Token::EOF, l.next_token());
481}
482
483#[test]
484fn should_be_deserialized() {
485    let rule = include_str!("simple_sigma_rule.yml");
486    let yml_test: SigmaRule = serde_yaml::from_str(&rule).unwrap();
487    let _yml_text = serde_yaml::to_string(&yml_test).unwrap();
488    let rule = include_str!("c2_sigma_rule.yml");
489    let yml_test: SigmaRule = serde_yaml::from_str(&rule).unwrap();
490    let _yml_text = serde_json::to_string_pretty(&yml_test).unwrap();
491    let rule = include_str!("7zip_sigma_rule.yml");
492    let yml_test: SigmaRule = serde_yaml::from_str(&rule).unwrap();
493    let _yml_text = serde_yaml::to_string(&yml_test).unwrap();
494}
495
496#[test]
497fn should_transform_c2_sigma_to_siem_rule() {
498    let rule = include_str!("c2_sigma_rule.yml");
499    let yml_test: SigmaRule = serde_yaml::from_str(&rule).unwrap();
500    let siem_rule: SiemRule = yml_test.into();
501    assert_eq!(
502        &MitreTechniques::T1041,
503        siem_rule.mitre.techniques.get(0).unwrap()
504    );
505    assert_eq!(&AlertContent::Text(LogString::Borrowed("Detects communication to C2 servers mentioned in the operational notes of the ShadowBroker leak of EquationGroup C2 tools")), siem_rule.alert.content.get(0).unwrap());
506    let select_incoming = siem_rule
507        .subrules
508        .get("select_incoming")
509        .unwrap()
510        .conditions
511        .get(0)
512        .unwrap();
513    assert_eq!("src_ip", select_incoming.field);
514    assert_eq!(
515        RuleOperator::Any(vec![
516            Box::new(RuleOperator::Equals(SiemField::IP([69, 42, 98, 86].into()))),
517            Box::new(RuleOperator::Equals(SiemField::IP(
518                [89, 185, 234, 145].into()
519            )))
520        ]),
521        select_incoming.operator
522    );
523    let select_outgoing = siem_rule
524        .subrules
525        .get("select_outgoing")
526        .unwrap()
527        .conditions
528        .get(0)
529        .unwrap();
530    assert_eq!("dst_ip", select_outgoing.field);
531    assert_eq!(
532        RuleOperator::Any(vec![
533            Box::new(RuleOperator::Equals(SiemField::IP([69, 42, 98, 86].into()))),
534            Box::new(RuleOperator::Equals(SiemField::IP(
535                [89, 185, 234, 145].into()
536            )))
537        ]),
538        select_outgoing.operator
539    );
540}
541
542#[test]
543fn should_transform_7zip_sigma_to_siem_rule() {
544    let rule = include_str!("7zip_sigma_rule.yml");
545    let yml_test: SigmaRule = serde_yaml::from_str(&rule).unwrap();
546    let siem_rule: SiemRule = yml_test.into();
547    assert_eq!(&AlertContent::Text(LogString::Borrowed("7-Zip through 21.07 on Windows allows privilege escalation (CVE-2022-29072) and command execution when a file with the .7z extension is dragged to the Help>Contents area. This is caused by misconfiguration of 7z.dll and a heap overflow. The command runs in a child process under the 7zFM.exe process.")), siem_rule.alert.content.get(0).unwrap());
548
549    let img_ends_with = siem_rule
550        .subrules
551        .get("selection_img")
552        .unwrap()
553        .conditions
554        .get(0)
555        .unwrap();
556    assert_eq!("Image", img_ends_with.field);
557    assert_eq!(
558        RuleOperator::EndsWith(format!("\\cmd.exe")),
559        img_ends_with.operator
560    );
561    let original_file_name = siem_rule
562        .subrules
563        .get("selection_img")
564        .unwrap()
565        .conditions
566        .get(1)
567        .unwrap();
568    assert_eq!("OriginalFileName", original_file_name.field);
569    assert_eq!(
570        RuleOperator::Equals("Cmd.Exe".into()),
571        original_file_name.operator
572    );
573
574    let parent_image = siem_rule
575        .subrules
576        .get("selection_parent")
577        .unwrap()
578        .conditions
579        .get(0)
580        .unwrap();
581    assert_eq!("ParentImage", parent_image.field);
582    assert_eq!(
583        RuleOperator::EndsWith(format!("\\7zFM.exe")),
584        parent_image.operator
585    );
586
587    let bat_command_line = siem_rule
588        .subrules
589        .get("filter_bat")
590        .unwrap()
591        .conditions
592        .get(0)
593        .unwrap();
594    assert_eq!("CommandLine", bat_command_line.field);
595    assert_eq!(
596        RuleOperator::Any(vec![
597            Box::new(RuleOperator::Contains(format!(" /c "))),
598            Box::new(RuleOperator::Contains(format!(" /k "))),
599            Box::new(RuleOperator::Contains(format!(" /r "))),
600        ]),
601        bat_command_line.operator
602    );
603
604    let filter_null = siem_rule
605        .subrules
606        .get("filter_null")
607        .unwrap()
608        .conditions
609        .get(0)
610        .unwrap();
611    assert_eq!("CommandLine", filter_null.field);
612    assert_eq!(RuleOperator::IsNull(true), filter_null.operator);
613}