Skip to main content

rsigma_parser/
condition.rs

1//! Condition expression parser using pest PEG grammar + Pratt parser.
2//!
3//! Parses Sigma condition strings like:
4//! - `"selection and not filter"`
5//! - `"1 of selection_* and not 1 of filter_*"`
6//! - `"all of them"`
7//! - `"selection_main and 1 of selection_dword_* and not 1 of filter_optional_*"`
8//!
9//! Reference: pySigma conditions.py (uses pyparsing infix_notation)
10
11use pest::Parser;
12use pest::iterators::Pair;
13use pest::pratt_parser::{Assoc, Op, PrattParser};
14use pest_derive::Parser;
15
16use crate::ast::{ConditionExpr, Quantifier, SelectorPattern};
17use crate::error::{Result, SigmaParserError};
18
19// ---------------------------------------------------------------------------
20// Pest parser (generated from sigma.pest grammar)
21// ---------------------------------------------------------------------------
22
23#[derive(Parser)]
24#[grammar = "src/sigma.pest"]
25struct SigmaConditionParser;
26
27// ---------------------------------------------------------------------------
28// Public API
29// ---------------------------------------------------------------------------
30
31/// Parse a Sigma condition expression string into an AST.
32///
33/// # Examples
34///
35/// ```
36/// use rsigma_parser::condition::parse_condition;
37///
38/// let expr = parse_condition("selection and not filter").unwrap();
39/// println!("{expr}");
40/// ```
41pub fn parse_condition(input: &str) -> Result<ConditionExpr> {
42    let pairs = SigmaConditionParser::parse(Rule::condition, input)
43        .map_err(|e| SigmaParserError::Condition(e.to_string()))?;
44
45    let pratt = PrattParser::new()
46        .op(Op::infix(Rule::or_op, Assoc::Left))
47        .op(Op::infix(Rule::and_op, Assoc::Left))
48        .op(Op::prefix(Rule::not_op));
49
50    // condition = { SOI ~ expr ~ EOI }
51    let condition_pair = pairs
52        .into_iter()
53        .next()
54        .ok_or_else(|| SigmaParserError::Condition("empty condition expression".into()))?;
55    let expr_pair = condition_pair
56        .into_inner()
57        .find(|p| p.as_rule() == Rule::expr)
58        .ok_or_else(|| SigmaParserError::Condition("missing expr in condition".into()))?;
59
60    parse_expr(expr_pair, &pratt)
61}
62
63// ---------------------------------------------------------------------------
64// Internal parsing helpers
65// ---------------------------------------------------------------------------
66
67fn parse_expr(pair: Pair<'_, Rule>, pratt: &PrattParser<Rule>) -> Result<ConditionExpr> {
68    // The Pratt parser closures cannot return Result, so we capture the first
69    // error in a shared RefCell and propagate it after parsing completes.
70    let error: std::cell::RefCell<Option<String>> = std::cell::RefCell::new(None);
71
72    let result = pratt
73        .map_primary(|primary| match primary.as_rule() {
74            Rule::ident => ConditionExpr::Identifier(primary.as_str().to_string()),
75            Rule::selector => parse_selector(primary).unwrap_or_else(|e| {
76                if error.borrow().is_none() {
77                    *error.borrow_mut() = Some(e.to_string());
78                }
79                ConditionExpr::Identifier(String::new())
80            }),
81            Rule::expr => parse_expr(primary, pratt).unwrap_or_else(|e| {
82                if error.borrow().is_none() {
83                    *error.borrow_mut() = Some(e.to_string());
84                }
85                ConditionExpr::Identifier(String::new())
86            }),
87            other => {
88                if error.borrow().is_none() {
89                    *error.borrow_mut() = Some(format!("unexpected primary rule: {other:?}"));
90                }
91                ConditionExpr::Identifier(String::new())
92            }
93        })
94        .map_prefix(|op, rhs| match op.as_rule() {
95            Rule::not_op => ConditionExpr::Not(Box::new(rhs)),
96            other => {
97                if error.borrow().is_none() {
98                    *error.borrow_mut() = Some(format!("unexpected prefix rule: {other:?}"));
99                }
100                rhs
101            }
102        })
103        .map_infix(|lhs, op, rhs| match op.as_rule() {
104            Rule::and_op => merge_binary(ConditionExpr::And, lhs, rhs),
105            Rule::or_op => merge_binary(ConditionExpr::Or, lhs, rhs),
106            other => {
107                if error.borrow().is_none() {
108                    *error.borrow_mut() = Some(format!("unexpected infix rule: {other:?}"));
109                }
110                lhs
111            }
112        })
113        .parse(pair.into_inner());
114
115    if let Some(msg) = error.into_inner() {
116        return Err(SigmaParserError::Condition(msg));
117    }
118
119    Ok(result)
120}
121
122/// Flatten nested binary operators of the same kind.
123/// `a AND (b AND c)` → `AND(a, b, c)` instead of `AND(a, AND(b, c))`.
124fn merge_binary(
125    ctor: fn(Vec<ConditionExpr>) -> ConditionExpr,
126    lhs: ConditionExpr,
127    rhs: ConditionExpr,
128) -> ConditionExpr {
129    // Flatten same-type children to avoid unnecessary nesting: And(And(a, b), c) → And(a, b, c)
130    let is_and = matches!(ctor(vec![]), ConditionExpr::And(_));
131
132    let mut args = Vec::new();
133    for expr in [lhs, rhs] {
134        match expr {
135            ConditionExpr::And(children) if is_and => args.extend(children),
136            ConditionExpr::Or(children) if !is_and => args.extend(children),
137            other => args.push(other),
138        }
139    }
140
141    ctor(args)
142}
143
144fn parse_selector(pair: Pair<'_, Rule>) -> Result<ConditionExpr> {
145    // Iterate children, skipping the of_kw_inner pair (atomic rules can't be silent
146    // in pest, so of_kw_inner leaks into the parse tree)
147    let mut quantifier_pair = None;
148    let mut target_pair = None;
149
150    for p in pair.into_inner() {
151        match p.as_rule() {
152            Rule::quantifier => quantifier_pair = Some(p),
153            Rule::selector_target => target_pair = Some(p),
154            _ => {} // skip of_kw_inner
155        }
156    }
157
158    let quantifier = parse_quantifier(
159        quantifier_pair
160            .ok_or_else(|| SigmaParserError::Condition("selector missing quantifier".into()))?,
161    )?;
162    let pattern = parse_selector_target(
163        target_pair.ok_or_else(|| SigmaParserError::Condition("selector missing target".into()))?,
164    )?;
165
166    Ok(ConditionExpr::Selector {
167        quantifier,
168        pattern,
169    })
170}
171
172fn parse_quantifier(pair: Pair<'_, Rule>) -> Result<Quantifier> {
173    let inner = pair
174        .into_inner()
175        .next()
176        .ok_or_else(|| SigmaParserError::Condition("quantifier missing child".into()))?;
177    match inner.as_rule() {
178        Rule::all_kw => Ok(Quantifier::All),
179        Rule::any_kw => Ok(Quantifier::Any),
180        Rule::uint => {
181            let n: u64 = inner.as_str().parse().map_err(|e| {
182                SigmaParserError::Condition(format!("invalid quantifier number: {e}"))
183            })?;
184            if n == 1 {
185                Ok(Quantifier::Any)
186            } else {
187                Ok(Quantifier::Count(n))
188            }
189        }
190        other => Err(SigmaParserError::Condition(format!(
191            "unexpected quantifier rule: {other:?}"
192        ))),
193    }
194}
195
196fn parse_selector_target(pair: Pair<'_, Rule>) -> Result<SelectorPattern> {
197    let inner = pair
198        .into_inner()
199        .next()
200        .ok_or_else(|| SigmaParserError::Condition("selector target missing child".into()))?;
201    match inner.as_rule() {
202        Rule::them_kw => Ok(SelectorPattern::Them),
203        Rule::ident_pattern => Ok(SelectorPattern::Pattern(inner.as_str().to_string())),
204        other => Err(SigmaParserError::Condition(format!(
205            "unexpected selector target rule: {other:?}"
206        ))),
207    }
208}
209
210// =============================================================================
211// Tests
212// =============================================================================
213
214#[cfg(test)]
215mod tests {
216    use super::*;
217
218    #[test]
219    fn test_simple_identifier() {
220        let expr = parse_condition("selection").unwrap();
221        assert_eq!(expr, ConditionExpr::Identifier("selection".to_string()));
222    }
223
224    #[test]
225    fn test_and() {
226        let expr = parse_condition("selection and filter").unwrap();
227        assert_eq!(
228            expr,
229            ConditionExpr::And(vec![
230                ConditionExpr::Identifier("selection".to_string()),
231                ConditionExpr::Identifier("filter".to_string()),
232            ])
233        );
234    }
235
236    #[test]
237    fn test_or() {
238        let expr = parse_condition("selection1 or selection2").unwrap();
239        assert_eq!(
240            expr,
241            ConditionExpr::Or(vec![
242                ConditionExpr::Identifier("selection1".to_string()),
243                ConditionExpr::Identifier("selection2".to_string()),
244            ])
245        );
246    }
247
248    #[test]
249    fn test_not() {
250        let expr = parse_condition("not filter").unwrap();
251        assert_eq!(
252            expr,
253            ConditionExpr::Not(Box::new(ConditionExpr::Identifier("filter".to_string())))
254        );
255    }
256
257    #[test]
258    fn test_and_not() {
259        let expr = parse_condition("selection and not filter").unwrap();
260        assert_eq!(
261            expr,
262            ConditionExpr::And(vec![
263                ConditionExpr::Identifier("selection".to_string()),
264                ConditionExpr::Not(Box::new(ConditionExpr::Identifier("filter".to_string()))),
265            ])
266        );
267    }
268
269    #[test]
270    fn test_precedence_not_and_or() {
271        // "a or not b and c" should parse as "a or ((not b) and c)"
272        let expr = parse_condition("a or not b and c").unwrap();
273        assert_eq!(
274            expr,
275            ConditionExpr::Or(vec![
276                ConditionExpr::Identifier("a".to_string()),
277                ConditionExpr::And(vec![
278                    ConditionExpr::Not(Box::new(ConditionExpr::Identifier("b".to_string()))),
279                    ConditionExpr::Identifier("c".to_string()),
280                ]),
281            ])
282        );
283    }
284
285    #[test]
286    fn test_parentheses() {
287        let expr = parse_condition("(a or b) and c").unwrap();
288        assert_eq!(
289            expr,
290            ConditionExpr::And(vec![
291                ConditionExpr::Or(vec![
292                    ConditionExpr::Identifier("a".to_string()),
293                    ConditionExpr::Identifier("b".to_string()),
294                ]),
295                ConditionExpr::Identifier("c".to_string()),
296            ])
297        );
298    }
299
300    #[test]
301    fn test_selector_1_of_pattern() {
302        let expr = parse_condition("1 of selection_*").unwrap();
303        assert_eq!(
304            expr,
305            ConditionExpr::Selector {
306                quantifier: Quantifier::Any,
307                pattern: SelectorPattern::Pattern("selection_*".to_string()),
308            }
309        );
310    }
311
312    #[test]
313    fn test_selector_all_of_them() {
314        let expr = parse_condition("all of them").unwrap();
315        assert_eq!(
316            expr,
317            ConditionExpr::Selector {
318                quantifier: Quantifier::All,
319                pattern: SelectorPattern::Them,
320            }
321        );
322    }
323
324    #[test]
325    fn test_selector_any_of() {
326        let expr = parse_condition("any of selection*").unwrap();
327        assert_eq!(
328            expr,
329            ConditionExpr::Selector {
330                quantifier: Quantifier::Any,
331                pattern: SelectorPattern::Pattern("selection*".to_string()),
332            }
333        );
334    }
335
336    #[test]
337    fn test_complex_condition() {
338        // Real-world: selection_main and 1 of selection_dword_* and not 1 of filter_optional_*
339        let expr = parse_condition(
340            "selection_main and 1 of selection_dword_* and not 1 of filter_optional_*",
341        )
342        .unwrap();
343        assert_eq!(
344            expr,
345            ConditionExpr::And(vec![
346                ConditionExpr::Identifier("selection_main".to_string()),
347                ConditionExpr::Selector {
348                    quantifier: Quantifier::Any,
349                    pattern: SelectorPattern::Pattern("selection_dword_*".to_string()),
350                },
351                ConditionExpr::Not(Box::new(ConditionExpr::Selector {
352                    quantifier: Quantifier::Any,
353                    pattern: SelectorPattern::Pattern("filter_optional_*".to_string()),
354                })),
355            ])
356        );
357    }
358
359    #[test]
360    fn test_identifier_with_keyword_substring() {
361        // "and_filter" should be parsed as an identifier, not "and" + "filter"
362        let expr = parse_condition("selection_and_filter").unwrap();
363        assert_eq!(
364            expr,
365            ConditionExpr::Identifier("selection_and_filter".to_string())
366        );
367    }
368
369    #[test]
370    fn test_identifier_with_hyphen() {
371        let expr = parse_condition("my-selection and my-filter").unwrap();
372        assert_eq!(
373            expr,
374            ConditionExpr::And(vec![
375                ConditionExpr::Identifier("my-selection".to_string()),
376                ConditionExpr::Identifier("my-filter".to_string()),
377            ])
378        );
379    }
380
381    #[test]
382    fn test_triple_and_flattened() {
383        let expr = parse_condition("a and b and c").unwrap();
384        assert_eq!(
385            expr,
386            ConditionExpr::And(vec![
387                ConditionExpr::Identifier("a".to_string()),
388                ConditionExpr::Identifier("b".to_string()),
389                ConditionExpr::Identifier("c".to_string()),
390            ])
391        );
392    }
393
394    #[test]
395    fn test_triple_or_flattened() {
396        let expr = parse_condition("a or b or c").unwrap();
397        assert_eq!(
398            expr,
399            ConditionExpr::Or(vec![
400                ConditionExpr::Identifier("a".to_string()),
401                ConditionExpr::Identifier("b".to_string()),
402                ConditionExpr::Identifier("c".to_string()),
403            ])
404        );
405    }
406
407    #[test]
408    fn test_all_of_selection_and_not_filter() {
409        let expr =
410            parse_condition("all of selection_powershell_* or all of selection_wmic_*").unwrap();
411        assert_eq!(
412            expr,
413            ConditionExpr::Or(vec![
414                ConditionExpr::Selector {
415                    quantifier: Quantifier::All,
416                    pattern: SelectorPattern::Pattern("selection_powershell_*".to_string()),
417                },
418                ConditionExpr::Selector {
419                    quantifier: Quantifier::All,
420                    pattern: SelectorPattern::Pattern("selection_wmic_*".to_string()),
421                },
422            ])
423        );
424    }
425
426    #[test]
427    fn test_real_world_complex() {
428        // From rules: selection_key and (all of selection_powershell_* or all of selection_wmic_*)
429        let expr = parse_condition(
430            "selection_key and (all of selection_powershell_* or all of selection_wmic_*)",
431        )
432        .unwrap();
433        assert_eq!(
434            expr,
435            ConditionExpr::And(vec![
436                ConditionExpr::Identifier("selection_key".to_string()),
437                ConditionExpr::Or(vec![
438                    ConditionExpr::Selector {
439                        quantifier: Quantifier::All,
440                        pattern: SelectorPattern::Pattern("selection_powershell_*".to_string()),
441                    },
442                    ConditionExpr::Selector {
443                        quantifier: Quantifier::All,
444                        pattern: SelectorPattern::Pattern("selection_wmic_*".to_string()),
445                    },
446                ]),
447            ])
448        );
449    }
450
451    #[test]
452    fn test_1_of_them() {
453        let expr = parse_condition("1 of them").unwrap();
454        assert_eq!(
455            expr,
456            ConditionExpr::Selector {
457                quantifier: Quantifier::Any,
458                pattern: SelectorPattern::Them,
459            }
460        );
461    }
462
463    #[test]
464    fn test_count_of() {
465        let expr = parse_condition("3 of selection_*").unwrap();
466        assert_eq!(
467            expr,
468            ConditionExpr::Selector {
469                quantifier: Quantifier::Count(3),
470                pattern: SelectorPattern::Pattern("selection_*".to_string()),
471            }
472        );
473    }
474
475    #[test]
476    fn test_not_1_of_filter() {
477        let expr = parse_condition("selection and not 1 of filter*").unwrap();
478        assert_eq!(
479            expr,
480            ConditionExpr::And(vec![
481                ConditionExpr::Identifier("selection".to_string()),
482                ConditionExpr::Not(Box::new(ConditionExpr::Selector {
483                    quantifier: Quantifier::Any,
484                    pattern: SelectorPattern::Pattern("filter*".to_string()),
485                })),
486            ])
487        );
488    }
489}