Skip to main content

rsigma_parser/
condition.rs

1//! Condition expression parser using pest PEG grammar + Pratt parser.
2//!
3//! Parses Sigma condition strings like:
4//! - `"selection and not filter"`
5//! - `"1 of selection_* and not 1 of filter_*"`
6//! - `"all of them"`
7//! - `"selection_main and 1 of selection_dword_* and not 1 of filter_optional_*"`
8//!
9//! Reference: pySigma conditions.py (uses pyparsing infix_notation)
10
11use pest::Parser;
12use pest::iterators::Pair;
13use pest::pratt_parser::{Assoc, Op, PrattParser};
14use pest_derive::Parser;
15
16use crate::ast::{ConditionExpr, Quantifier, SelectorPattern};
17use crate::error::{Result, SigmaParserError, SourceLocation};
18
19// ---------------------------------------------------------------------------
20// Pest parser (generated from sigma.pest grammar)
21// ---------------------------------------------------------------------------
22
23#[derive(Parser)]
24#[grammar = "src/sigma.pest"]
25struct SigmaConditionParser;
26
27// ---------------------------------------------------------------------------
28// Public API
29// ---------------------------------------------------------------------------
30
31/// Parse a Sigma condition expression string into an AST.
32///
33/// # Examples
34///
35/// ```
36/// use rsigma_parser::condition::parse_condition;
37///
38/// let expr = parse_condition("selection and not filter").unwrap();
39/// println!("{expr}");
40/// ```
41pub fn parse_condition(input: &str) -> Result<ConditionExpr> {
42    let pairs = SigmaConditionParser::parse(Rule::condition, input).map_err(|e| {
43        let loc = extract_pest_location(&e);
44        SigmaParserError::Condition(e.to_string(), loc)
45    })?;
46
47    let pratt = PrattParser::new()
48        .op(Op::infix(Rule::or_op, Assoc::Left))
49        .op(Op::infix(Rule::and_op, Assoc::Left))
50        .op(Op::prefix(Rule::not_op));
51
52    // condition = { SOI ~ expr ~ EOI }
53    let condition_pair = pairs
54        .into_iter()
55        .next()
56        .ok_or_else(|| SigmaParserError::Condition("empty condition expression".into(), None))?;
57    let expr_pair = condition_pair
58        .into_inner()
59        .find(|p| p.as_rule() == Rule::expr)
60        .ok_or_else(|| SigmaParserError::Condition("missing expr in condition".into(), None))?;
61
62    parse_expr(expr_pair, &pratt)
63}
64
65fn extract_pest_location(err: &pest::error::Error<Rule>) -> Option<SourceLocation> {
66    match err.line_col {
67        pest::error::LineColLocation::Pos((line, col)) => Some(SourceLocation {
68            line: line as u32,
69            col: col as u32,
70        }),
71        pest::error::LineColLocation::Span((line, col), _) => Some(SourceLocation {
72            line: line as u32,
73            col: col as u32,
74        }),
75    }
76}
77
78// ---------------------------------------------------------------------------
79// Internal parsing helpers
80// ---------------------------------------------------------------------------
81
82/// An error collected during Pratt parsing, with optional position context.
83struct PrattError {
84    message: String,
85    location: Option<SourceLocation>,
86}
87
88fn location_from_pair(pair: &Pair<'_, Rule>) -> Option<SourceLocation> {
89    let (line, col) = pair.as_span().start_pos().line_col();
90    Some(SourceLocation {
91        line: line as u32,
92        col: col as u32,
93    })
94}
95
96fn parse_expr(pair: Pair<'_, Rule>, pratt: &PrattParser<Rule>) -> Result<ConditionExpr> {
97    // The Pratt parser closures cannot return Result, so we collect all
98    // errors in a shared RefCell and report them after parsing completes.
99    let errors: std::cell::RefCell<Vec<PrattError>> = std::cell::RefCell::new(Vec::new());
100
101    let result = pratt
102        .map_primary(|primary| {
103            let loc = location_from_pair(&primary);
104            match primary.as_rule() {
105                Rule::ident => ConditionExpr::Identifier(primary.as_str().to_string()),
106                Rule::selector => parse_selector(primary).unwrap_or_else(|e| {
107                    errors.borrow_mut().push(PrattError {
108                        message: e.to_string(),
109                        location: e.location().or(loc),
110                    });
111                    ConditionExpr::Identifier(String::new())
112                }),
113                Rule::expr => parse_expr(primary, pratt).unwrap_or_else(|e| {
114                    errors.borrow_mut().push(PrattError {
115                        message: e.to_string(),
116                        location: e.location().or(loc),
117                    });
118                    ConditionExpr::Identifier(String::new())
119                }),
120                other => {
121                    errors.borrow_mut().push(PrattError {
122                        message: format!("unexpected primary rule: {other:?}"),
123                        location: loc,
124                    });
125                    ConditionExpr::Identifier(String::new())
126                }
127            }
128        })
129        .map_prefix(|op, rhs| {
130            let loc = location_from_pair(&op);
131            match op.as_rule() {
132                Rule::not_op => ConditionExpr::Not(Box::new(rhs)),
133                other => {
134                    errors.borrow_mut().push(PrattError {
135                        message: format!("unexpected prefix rule: {other:?}"),
136                        location: loc,
137                    });
138                    rhs
139                }
140            }
141        })
142        .map_infix(|lhs, op, rhs| {
143            let loc = location_from_pair(&op);
144            match op.as_rule() {
145                Rule::and_op => merge_binary(ConditionExpr::And, lhs, rhs),
146                Rule::or_op => merge_binary(ConditionExpr::Or, lhs, rhs),
147                other => {
148                    errors.borrow_mut().push(PrattError {
149                        message: format!("unexpected infix rule: {other:?}"),
150                        location: loc,
151                    });
152                    lhs
153                }
154            }
155        })
156        .parse(pair.into_inner());
157
158    let collected = errors.into_inner();
159    if !collected.is_empty() {
160        let combined = collected
161            .iter()
162            .map(|e| match &e.location {
163                Some(loc) => format!("at {loc}: {}", e.message),
164                None => e.message.clone(),
165            })
166            .collect::<Vec<_>>()
167            .join("; ");
168        let first_loc = collected.iter().find_map(|e| e.location);
169        return Err(SigmaParserError::Condition(combined, first_loc));
170    }
171
172    Ok(result)
173}
174
175/// Flatten nested binary operators of the same kind.
176/// `a AND (b AND c)` → `AND(a, b, c)` instead of `AND(a, AND(b, c))`.
177fn merge_binary(
178    ctor: fn(Vec<ConditionExpr>) -> ConditionExpr,
179    lhs: ConditionExpr,
180    rhs: ConditionExpr,
181) -> ConditionExpr {
182    // Flatten same-type children to avoid unnecessary nesting: And(And(a, b), c) → And(a, b, c)
183    let is_and = matches!(ctor(vec![]), ConditionExpr::And(_));
184
185    let mut args = Vec::new();
186    for expr in [lhs, rhs] {
187        match expr {
188            ConditionExpr::And(children) if is_and => args.extend(children),
189            ConditionExpr::Or(children) if !is_and => args.extend(children),
190            other => args.push(other),
191        }
192    }
193
194    ctor(args)
195}
196
197fn parse_selector(pair: Pair<'_, Rule>) -> Result<ConditionExpr> {
198    // Iterate children, skipping the of_kw_inner pair (atomic rules can't be silent
199    // in pest, so of_kw_inner leaks into the parse tree)
200    let mut quantifier_pair = None;
201    let mut target_pair = None;
202
203    for p in pair.into_inner() {
204        match p.as_rule() {
205            Rule::quantifier => quantifier_pair = Some(p),
206            Rule::selector_target => target_pair = Some(p),
207            _ => {} // skip of_kw_inner
208        }
209    }
210
211    let quantifier =
212        parse_quantifier(quantifier_pair.ok_or_else(|| {
213            SigmaParserError::Condition("selector missing quantifier".into(), None)
214        })?)?;
215    let pattern = parse_selector_target(
216        target_pair
217            .ok_or_else(|| SigmaParserError::Condition("selector missing target".into(), None))?,
218    )?;
219
220    Ok(ConditionExpr::Selector {
221        quantifier,
222        pattern,
223    })
224}
225
226fn parse_quantifier(pair: Pair<'_, Rule>) -> Result<Quantifier> {
227    let inner = pair
228        .into_inner()
229        .next()
230        .ok_or_else(|| SigmaParserError::Condition("quantifier missing child".into(), None))?;
231    match inner.as_rule() {
232        Rule::all_kw => Ok(Quantifier::All),
233        Rule::any_kw => Ok(Quantifier::Any),
234        Rule::uint => {
235            let n: u64 = inner.as_str().parse().map_err(|e| {
236                SigmaParserError::Condition(format!("invalid quantifier number: {e}"), None)
237            })?;
238            if n == 1 {
239                Ok(Quantifier::Any)
240            } else {
241                Ok(Quantifier::Count(n))
242            }
243        }
244        other => Err(SigmaParserError::Condition(
245            format!("unexpected quantifier rule: {other:?}"),
246            None,
247        )),
248    }
249}
250
251fn parse_selector_target(pair: Pair<'_, Rule>) -> Result<SelectorPattern> {
252    let inner = pair
253        .into_inner()
254        .next()
255        .ok_or_else(|| SigmaParserError::Condition("selector target missing child".into(), None))?;
256    match inner.as_rule() {
257        Rule::them_kw => Ok(SelectorPattern::Them),
258        Rule::ident_pattern => Ok(SelectorPattern::Pattern(inner.as_str().to_string())),
259        other => Err(SigmaParserError::Condition(
260            format!("unexpected selector target rule: {other:?}"),
261            None,
262        )),
263    }
264}
265
266// =============================================================================
267// Tests
268// =============================================================================
269
270#[cfg(test)]
271mod tests {
272    use super::*;
273
274    #[test]
275    fn test_simple_identifier() {
276        let expr = parse_condition("selection").unwrap();
277        assert_eq!(expr, ConditionExpr::Identifier("selection".to_string()));
278    }
279
280    #[test]
281    fn test_and() {
282        let expr = parse_condition("selection and filter").unwrap();
283        assert_eq!(
284            expr,
285            ConditionExpr::And(vec![
286                ConditionExpr::Identifier("selection".to_string()),
287                ConditionExpr::Identifier("filter".to_string()),
288            ])
289        );
290    }
291
292    #[test]
293    fn test_or() {
294        let expr = parse_condition("selection1 or selection2").unwrap();
295        assert_eq!(
296            expr,
297            ConditionExpr::Or(vec![
298                ConditionExpr::Identifier("selection1".to_string()),
299                ConditionExpr::Identifier("selection2".to_string()),
300            ])
301        );
302    }
303
304    #[test]
305    fn test_not() {
306        let expr = parse_condition("not filter").unwrap();
307        assert_eq!(
308            expr,
309            ConditionExpr::Not(Box::new(ConditionExpr::Identifier("filter".to_string())))
310        );
311    }
312
313    #[test]
314    fn test_and_not() {
315        let expr = parse_condition("selection and not filter").unwrap();
316        assert_eq!(
317            expr,
318            ConditionExpr::And(vec![
319                ConditionExpr::Identifier("selection".to_string()),
320                ConditionExpr::Not(Box::new(ConditionExpr::Identifier("filter".to_string()))),
321            ])
322        );
323    }
324
325    #[test]
326    fn test_precedence_not_and_or() {
327        // "a or not b and c" should parse as "a or ((not b) and c)"
328        let expr = parse_condition("a or not b and c").unwrap();
329        assert_eq!(
330            expr,
331            ConditionExpr::Or(vec![
332                ConditionExpr::Identifier("a".to_string()),
333                ConditionExpr::And(vec![
334                    ConditionExpr::Not(Box::new(ConditionExpr::Identifier("b".to_string()))),
335                    ConditionExpr::Identifier("c".to_string()),
336                ]),
337            ])
338        );
339    }
340
341    #[test]
342    fn test_parentheses() {
343        let expr = parse_condition("(a or b) and c").unwrap();
344        assert_eq!(
345            expr,
346            ConditionExpr::And(vec![
347                ConditionExpr::Or(vec![
348                    ConditionExpr::Identifier("a".to_string()),
349                    ConditionExpr::Identifier("b".to_string()),
350                ]),
351                ConditionExpr::Identifier("c".to_string()),
352            ])
353        );
354    }
355
356    #[test]
357    fn test_selector_1_of_pattern() {
358        let expr = parse_condition("1 of selection_*").unwrap();
359        assert_eq!(
360            expr,
361            ConditionExpr::Selector {
362                quantifier: Quantifier::Any,
363                pattern: SelectorPattern::Pattern("selection_*".to_string()),
364            }
365        );
366    }
367
368    #[test]
369    fn test_selector_all_of_them() {
370        let expr = parse_condition("all of them").unwrap();
371        assert_eq!(
372            expr,
373            ConditionExpr::Selector {
374                quantifier: Quantifier::All,
375                pattern: SelectorPattern::Them,
376            }
377        );
378    }
379
380    #[test]
381    fn test_selector_any_of() {
382        let expr = parse_condition("any of selection*").unwrap();
383        assert_eq!(
384            expr,
385            ConditionExpr::Selector {
386                quantifier: Quantifier::Any,
387                pattern: SelectorPattern::Pattern("selection*".to_string()),
388            }
389        );
390    }
391
392    #[test]
393    fn test_complex_condition() {
394        // Real-world: selection_main and 1 of selection_dword_* and not 1 of filter_optional_*
395        let expr = parse_condition(
396            "selection_main and 1 of selection_dword_* and not 1 of filter_optional_*",
397        )
398        .unwrap();
399        assert_eq!(
400            expr,
401            ConditionExpr::And(vec![
402                ConditionExpr::Identifier("selection_main".to_string()),
403                ConditionExpr::Selector {
404                    quantifier: Quantifier::Any,
405                    pattern: SelectorPattern::Pattern("selection_dword_*".to_string()),
406                },
407                ConditionExpr::Not(Box::new(ConditionExpr::Selector {
408                    quantifier: Quantifier::Any,
409                    pattern: SelectorPattern::Pattern("filter_optional_*".to_string()),
410                })),
411            ])
412        );
413    }
414
415    #[test]
416    fn test_identifier_with_keyword_substring() {
417        // "and_filter" should be parsed as an identifier, not "and" + "filter"
418        let expr = parse_condition("selection_and_filter").unwrap();
419        assert_eq!(
420            expr,
421            ConditionExpr::Identifier("selection_and_filter".to_string())
422        );
423    }
424
425    #[test]
426    fn test_identifier_with_hyphen() {
427        let expr = parse_condition("my-selection and my-filter").unwrap();
428        assert_eq!(
429            expr,
430            ConditionExpr::And(vec![
431                ConditionExpr::Identifier("my-selection".to_string()),
432                ConditionExpr::Identifier("my-filter".to_string()),
433            ])
434        );
435    }
436
437    #[test]
438    fn test_triple_and_flattened() {
439        let expr = parse_condition("a and b and c").unwrap();
440        assert_eq!(
441            expr,
442            ConditionExpr::And(vec![
443                ConditionExpr::Identifier("a".to_string()),
444                ConditionExpr::Identifier("b".to_string()),
445                ConditionExpr::Identifier("c".to_string()),
446            ])
447        );
448    }
449
450    #[test]
451    fn test_triple_or_flattened() {
452        let expr = parse_condition("a or b or c").unwrap();
453        assert_eq!(
454            expr,
455            ConditionExpr::Or(vec![
456                ConditionExpr::Identifier("a".to_string()),
457                ConditionExpr::Identifier("b".to_string()),
458                ConditionExpr::Identifier("c".to_string()),
459            ])
460        );
461    }
462
463    #[test]
464    fn test_all_of_selection_and_not_filter() {
465        let expr =
466            parse_condition("all of selection_powershell_* or all of selection_wmic_*").unwrap();
467        assert_eq!(
468            expr,
469            ConditionExpr::Or(vec![
470                ConditionExpr::Selector {
471                    quantifier: Quantifier::All,
472                    pattern: SelectorPattern::Pattern("selection_powershell_*".to_string()),
473                },
474                ConditionExpr::Selector {
475                    quantifier: Quantifier::All,
476                    pattern: SelectorPattern::Pattern("selection_wmic_*".to_string()),
477                },
478            ])
479        );
480    }
481
482    #[test]
483    fn test_real_world_complex() {
484        // From rules: selection_key and (all of selection_powershell_* or all of selection_wmic_*)
485        let expr = parse_condition(
486            "selection_key and (all of selection_powershell_* or all of selection_wmic_*)",
487        )
488        .unwrap();
489        assert_eq!(
490            expr,
491            ConditionExpr::And(vec![
492                ConditionExpr::Identifier("selection_key".to_string()),
493                ConditionExpr::Or(vec![
494                    ConditionExpr::Selector {
495                        quantifier: Quantifier::All,
496                        pattern: SelectorPattern::Pattern("selection_powershell_*".to_string()),
497                    },
498                    ConditionExpr::Selector {
499                        quantifier: Quantifier::All,
500                        pattern: SelectorPattern::Pattern("selection_wmic_*".to_string()),
501                    },
502                ]),
503            ])
504        );
505    }
506
507    #[test]
508    fn test_1_of_them() {
509        let expr = parse_condition("1 of them").unwrap();
510        assert_eq!(
511            expr,
512            ConditionExpr::Selector {
513                quantifier: Quantifier::Any,
514                pattern: SelectorPattern::Them,
515            }
516        );
517    }
518
519    #[test]
520    fn test_count_of() {
521        let expr = parse_condition("3 of selection_*").unwrap();
522        assert_eq!(
523            expr,
524            ConditionExpr::Selector {
525                quantifier: Quantifier::Count(3),
526                pattern: SelectorPattern::Pattern("selection_*".to_string()),
527            }
528        );
529    }
530
531    #[test]
532    fn test_not_1_of_filter() {
533        let expr = parse_condition("selection and not 1 of filter*").unwrap();
534        assert_eq!(
535            expr,
536            ConditionExpr::And(vec![
537                ConditionExpr::Identifier("selection".to_string()),
538                ConditionExpr::Not(Box::new(ConditionExpr::Selector {
539                    quantifier: Quantifier::Any,
540                    pattern: SelectorPattern::Pattern("filter*".to_string()),
541                })),
542            ])
543        );
544    }
545
546    // ── Multi-wildcard selector pattern tests ──────────────────────────────
547
548    #[test]
549    fn test_selector_multi_wildcard_pattern() {
550        let expr = parse_condition("1 of selection_*_*").unwrap();
551        assert_eq!(
552            expr,
553            ConditionExpr::Selector {
554                quantifier: Quantifier::Any,
555                pattern: SelectorPattern::Pattern("selection_*_*".to_string()),
556            }
557        );
558    }
559
560    #[test]
561    fn test_selector_leading_wildcard_pattern() {
562        let expr = parse_condition("all of *_selection_*").unwrap();
563        assert_eq!(
564            expr,
565            ConditionExpr::Selector {
566                quantifier: Quantifier::All,
567                pattern: SelectorPattern::Pattern("*_selection_*".to_string()),
568            }
569        );
570    }
571
572    #[test]
573    fn test_selector_bare_wildcard() {
574        let expr = parse_condition("1 of *").unwrap();
575        assert_eq!(
576            expr,
577            ConditionExpr::Selector {
578                quantifier: Quantifier::Any,
579                pattern: SelectorPattern::Pattern("*".to_string()),
580            }
581        );
582    }
583
584    #[test]
585    fn test_selector_triple_wildcard_segment() {
586        let expr = parse_condition("any of sel_*_*_*").unwrap();
587        assert_eq!(
588            expr,
589            ConditionExpr::Selector {
590                quantifier: Quantifier::Any,
591                pattern: SelectorPattern::Pattern("sel_*_*_*".to_string()),
592            }
593        );
594    }
595
596    #[test]
597    fn test_multi_wildcard_in_complex_condition() {
598        let expr =
599            parse_condition("selection_main and 1 of sel_*_* and not 1 of filter_*_*").unwrap();
600        assert_eq!(
601            expr,
602            ConditionExpr::And(vec![
603                ConditionExpr::Identifier("selection_main".to_string()),
604                ConditionExpr::Selector {
605                    quantifier: Quantifier::Any,
606                    pattern: SelectorPattern::Pattern("sel_*_*".to_string()),
607                },
608                ConditionExpr::Not(Box::new(ConditionExpr::Selector {
609                    quantifier: Quantifier::Any,
610                    pattern: SelectorPattern::Pattern("filter_*_*".to_string()),
611                })),
612            ])
613        );
614    }
615
616    #[test]
617    fn test_selector_wildcard_only_prefix() {
618        let expr = parse_condition("all of *suffix").unwrap();
619        assert_eq!(
620            expr,
621            ConditionExpr::Selector {
622                quantifier: Quantifier::All,
623                pattern: SelectorPattern::Pattern("*suffix".to_string()),
624            }
625        );
626    }
627
628    // ── Malformed condition expression tests ─────────────────────────────
629
630    #[test]
631    fn test_empty_string_fails() {
632        let err = parse_condition("").unwrap_err();
633        assert!(matches!(err, SigmaParserError::Condition(_, _)));
634    }
635
636    #[test]
637    fn test_whitespace_only_fails() {
638        let err = parse_condition("   ").unwrap_err();
639        assert!(matches!(err, SigmaParserError::Condition(_, _)));
640    }
641
642    #[test]
643    fn test_trailing_operator_fails() {
644        let err = parse_condition("selection and").unwrap_err();
645        assert!(matches!(err, SigmaParserError::Condition(_, _)));
646    }
647
648    #[test]
649    fn test_leading_operator_fails() {
650        let err = parse_condition("and selection").unwrap_err();
651        assert!(matches!(err, SigmaParserError::Condition(_, _)));
652    }
653
654    #[test]
655    fn test_double_operator_fails() {
656        let err = parse_condition("selection and and filter").unwrap_err();
657        assert!(matches!(err, SigmaParserError::Condition(_, _)));
658    }
659
660    #[test]
661    fn test_unbalanced_open_paren_fails() {
662        let err = parse_condition("(selection and filter").unwrap_err();
663        assert!(matches!(err, SigmaParserError::Condition(_, _)));
664    }
665
666    #[test]
667    fn test_unbalanced_close_paren_fails() {
668        let err = parse_condition("selection and filter)").unwrap_err();
669        assert!(matches!(err, SigmaParserError::Condition(_, _)));
670    }
671
672    #[test]
673    fn test_empty_parens_fails() {
674        let err = parse_condition("()").unwrap_err();
675        assert!(matches!(err, SigmaParserError::Condition(_, _)));
676    }
677
678    #[test]
679    fn test_only_operator_fails() {
680        let err = parse_condition("and").unwrap_err();
681        assert!(matches!(err, SigmaParserError::Condition(_, _)));
682    }
683
684    #[test]
685    fn test_only_not_fails() {
686        let err = parse_condition("not").unwrap_err();
687        assert!(matches!(err, SigmaParserError::Condition(_, _)));
688    }
689
690    #[test]
691    fn test_or_alone_fails() {
692        let err = parse_condition("or").unwrap_err();
693        assert!(matches!(err, SigmaParserError::Condition(_, _)));
694    }
695
696    #[test]
697    fn test_incomplete_selector_missing_target_fails() {
698        let err = parse_condition("1 of").unwrap_err();
699        assert!(matches!(err, SigmaParserError::Condition(_, _)));
700    }
701
702    #[test]
703    fn test_of_without_quantifier_fails() {
704        let err = parse_condition("of selection_*").unwrap_err();
705        assert!(matches!(err, SigmaParserError::Condition(_, _)));
706    }
707
708    #[test]
709    fn test_pest_error_carries_location() {
710        let err = parse_condition("selection and").unwrap_err();
711        match &err {
712            SigmaParserError::Condition(_, loc) => {
713                assert!(
714                    loc.is_some(),
715                    "pest parse errors should carry source location"
716                );
717            }
718            _ => panic!("Expected Condition error"),
719        }
720    }
721
722    #[test]
723    fn test_invalid_characters_fails() {
724        let err = parse_condition("selection @ filter").unwrap_err();
725        assert!(matches!(err, SigmaParserError::Condition(_, _)));
726    }
727
728    #[test]
729    fn test_nested_empty_parens_fails() {
730        let err = parse_condition("selection and ()").unwrap_err();
731        assert!(matches!(err, SigmaParserError::Condition(_, _)));
732    }
733}