sigma_rust/detection/
ast.rs

1use crate::detection::lexer::{Lexer, Token};
2use crate::error::ParserError;
3use crate::wildcard::WildcardToken;
4use std::collections::HashSet;
5use std::fmt;
6
7enum PrefixOperator {
8    Not,
9}
10
11impl PrefixOperator {
12    fn binding_power(&self) -> u8 {
13        match self {
14            Self::Not => 3,
15        }
16    }
17}
18
19enum InfixOperator {
20    And,
21    Or,
22}
23
24impl InfixOperator {
25    fn binding_power(&self) -> u8 {
26        match self {
27            Self::And => 2,
28            Self::Or => 1,
29        }
30    }
31}
32
33#[derive(Debug)]
34pub(crate) enum Ast {
35    Selection(String),
36    OneOf(Vec<WildcardToken>),
37    OneOfThem,
38    AllOf(Vec<WildcardToken>),
39    AllOfThem,
40    Not(Box<Ast>),
41    And(Box<Ast>, Box<Ast>),
42    Or(Box<Ast>, Box<Ast>),
43}
44
45impl Default for Ast {
46    fn default() -> Self {
47        Self::Selection("".to_string())
48    }
49}
50
51impl Ast {
52    pub(crate) fn new(input: &str) -> Result<Self, ParserError> {
53        let mut lexer = Lexer::new(input);
54        Self::parse_token_stream(&mut lexer, 0)
55    }
56
57    fn tokenize_selection_string(selection: String) -> Vec<WildcardToken> {
58        let mut result: Vec<WildcardToken> = vec![];
59        let mut buffer: Vec<char> = vec![];
60
61        for c in selection.chars() {
62            if c == '*' {
63                if !buffer.is_empty() {
64                    result.push(WildcardToken::Pattern(buffer.clone()));
65                    buffer.clear();
66                }
67                if !matches!(result.last(), Some(WildcardToken::Star)) {
68                    result.push(WildcardToken::Star);
69                }
70            } else {
71                buffer.push(c);
72            }
73        }
74        if !buffer.is_empty() {
75            result.push(WildcardToken::Pattern(buffer.clone()));
76        }
77        result
78    }
79
80    fn parse_token_stream(lexer: &mut Lexer, min_binding_power: u8) -> Result<Self, ParserError> {
81        let mut left = match lexer.next() {
82            Token::Selection(s) => Self::Selection(s),
83            Token::OneOf(s) => Self::OneOf(Self::tokenize_selection_string(s)),
84            Token::OneOfThem => Self::OneOfThem,
85            Token::AllOf(s) => Self::AllOf(Self::tokenize_selection_string(s)),
86            Token::AllOfThem => Self::AllOfThem,
87            Token::OpeningParenthesis => {
88                let left = Self::parse_token_stream(lexer, 0)?;
89                if lexer.next() != Token::ClosingParenthesis {
90                    return Err(ParserError::MissingClosingParenthesis());
91                }
92                left
93            }
94            Token::Not => {
95                let right = Self::parse_token_stream(lexer, PrefixOperator::Not.binding_power())?;
96                Self::Not(Box::new(right))
97            }
98            t => return Err(ParserError::UnexpectedToken(t.to_string())),
99        };
100
101        loop {
102            let operator = match lexer.peek() {
103                Token::End | Token::ClosingParenthesis => break,
104                Token::And => InfixOperator::And,
105                Token::Or => InfixOperator::Or,
106                t => return Err(ParserError::InvalidOperator(t.to_string())),
107            };
108
109            let bp = operator.binding_power();
110            if bp < min_binding_power {
111                break;
112            }
113            lexer.next();
114
115            left = {
116                let right = Self::parse_token_stream(lexer, bp)?;
117                match operator {
118                    InfixOperator::And => Self::And(Box::new(left), Box::new(right)),
119                    InfixOperator::Or => Self::Or(Box::new(left), Box::new(right)),
120                }
121            };
122        }
123
124        Ok(left)
125    }
126
127    pub(crate) fn selections(&self) -> HashSet<&str> {
128        let mut result: HashSet<&str> = HashSet::new();
129        Self::selections_recursive(self, &mut result);
130        result
131    }
132
133    fn selections_recursive<'a>(current: &'a Self, acc: &mut HashSet<&'a str>) {
134        match current {
135            Self::Selection(s) => _ = acc.insert(s),
136            Self::Not(s) => Self::selections_recursive(s, acc),
137            Self::Or(left, right) | Self::And(left, right) => {
138                Self::selections_recursive(left, acc);
139                Self::selections_recursive(right, acc);
140            }
141            Self::OneOf(_) | Self::OneOfThem | Self::AllOf(_) | Self::AllOfThem => {}
142        }
143    }
144}
145
146impl fmt::Display for Ast {
147    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
148        match self {
149            Self::Selection(s) => write!(f, "{}", s),
150            Self::OneOf(s) => write!(f, "1 of {:?}", s),
151            Self::OneOfThem => write!(f, "1 of them"),
152            Self::AllOf(s) => write!(f, "all of {:?}", s),
153            Self::AllOfThem => write!(f, "all of them"),
154            Self::Not(a) => write!(f, "not ({})", a),
155            Self::And(a, b) => write!(f, "({} and {})", a, b),
156            Self::Or(a, b) => write!(f, "({} or {})", a, b),
157        }
158    }
159}
160
161#[cfg(test)]
162mod tests {
163    use super::*;
164
165    #[test]
166    fn test_parse_simple_expression() {
167        let ast = Ast::new("selection_1 and selection_2").unwrap();
168        assert_eq!(ast.to_string(), "(selection_1 and selection_2)");
169    }
170
171    #[test]
172    fn test_parse_binding_power() {
173        let ast = Ast::new("x or y and z").unwrap();
174        assert_eq!(ast.to_string(), "(x or (y and z))");
175    }
176
177    #[test]
178    fn test_parse_all() {
179        let ast = Ast::new("x or 1 of them and all of y* ").unwrap();
180        assert_eq!(
181            ast.to_string(),
182            "(x or (1 of them and all of [Pattern(['y']), Star]))"
183        );
184
185        let ast = Ast::new("x or 1 of them and all of y** ").unwrap();
186        assert_eq!(
187            ast.to_string(),
188            "(x or (1 of them and all of [Pattern(['y']), Star]))"
189        );
190
191        let ast = Ast::new("x or 1 of them and all of y**z ").unwrap();
192        assert_eq!(
193            ast.to_string(),
194            "(x or (1 of them and all of [Pattern(['y']), Star, Pattern(['z'])]))"
195        );
196    }
197
198    #[test]
199    fn test_parse_parentheses() {
200        let ast = Ast::new("x or y and z").unwrap();
201        assert_eq!(ast.to_string(), "(x or (y and z))");
202
203        let ast = Ast::new("( x or y ) and z)").unwrap();
204        assert_eq!(ast.to_string(), "((x or y) and z)");
205    }
206
207    #[test]
208    fn test_not() {
209        let ast = Ast::new("a and not b or not not c").unwrap();
210        assert_eq!(ast.to_string(), "((a and not (b)) or not (not (c)))");
211    }
212
213    #[test]
214    fn test_mismatching_parentheses() {
215        let err = Ast::new("x and ( y or z ").unwrap_err();
216        assert!(matches!(err, ParserError::MissingClosingParenthesis()));
217    }
218
219    #[test]
220    fn test_get_identifiers() {
221        let ast = Ast::new("x1 and x2 or x3 and 1 of x4* or all of x5* or x1").unwrap();
222        let identifiers = ast.selections();
223        assert_eq!(identifiers, HashSet::from(["x1", "x2", "x3"]));
224    }
225
226    #[test]
227    fn test_selections_without_logical_operator() {
228        let err =
229            Ast::new(" write TargetLogonId from selection1 (if not selection2) ").unwrap_err();
230        assert!(matches!(err, ParserError::InvalidOperator(ref a) if a == "TargetLogonId"));
231    }
232}