1use pest::Parser;
12use pest::iterators::Pair;
13use pest::pratt_parser::{Assoc, Op, PrattParser};
14use pest_derive::Parser;
15
16use crate::ast::{ConditionExpr, Quantifier, SelectorPattern};
17use crate::error::{Result, SigmaParserError, SourceLocation};
18
19#[derive(Parser)]
24#[grammar = "src/sigma.pest"]
25struct SigmaConditionParser;
26
27const MAX_CONDITION_LEN: usize = 64 * 1024;
32const MAX_CONDITION_DEPTH: usize = 64;
33
34pub fn parse_condition(input: &str) -> Result<ConditionExpr> {
45 if input.len() > MAX_CONDITION_LEN {
46 return Err(SigmaParserError::ConditionTooLong(
47 input.len(),
48 MAX_CONDITION_LEN,
49 ));
50 }
51
52 let pairs = SigmaConditionParser::parse(Rule::condition, input).map_err(|e| {
53 let loc = extract_pest_location(&e);
54 SigmaParserError::Condition(e.to_string(), loc)
55 })?;
56
57 let pratt = PrattParser::new()
58 .op(Op::infix(Rule::or_op, Assoc::Left))
59 .op(Op::infix(Rule::and_op, Assoc::Left))
60 .op(Op::prefix(Rule::not_op));
61
62 let condition_pair = pairs
64 .into_iter()
65 .next()
66 .ok_or_else(|| SigmaParserError::Condition("empty condition expression".into(), None))?;
67 let expr_pair = condition_pair
68 .into_inner()
69 .find(|p| p.as_rule() == Rule::expr)
70 .ok_or_else(|| SigmaParserError::Condition("missing expr in condition".into(), None))?;
71
72 let depth = std::cell::Cell::new(0usize);
73 parse_expr(expr_pair, &pratt, &depth)
74}
75
76fn extract_pest_location(err: &pest::error::Error<Rule>) -> Option<SourceLocation> {
77 match err.line_col {
78 pest::error::LineColLocation::Pos((line, col)) => Some(SourceLocation {
79 line: line as u32,
80 col: col as u32,
81 }),
82 pest::error::LineColLocation::Span((line, col), _) => Some(SourceLocation {
83 line: line as u32,
84 col: col as u32,
85 }),
86 }
87}
88
89struct PrattError {
95 message: String,
96 location: Option<SourceLocation>,
97}
98
99fn location_from_pair(pair: &Pair<'_, Rule>) -> Option<SourceLocation> {
100 let (line, col) = pair.as_span().start_pos().line_col();
101 Some(SourceLocation {
102 line: line as u32,
103 col: col as u32,
104 })
105}
106
107fn parse_expr(
108 pair: Pair<'_, Rule>,
109 pratt: &PrattParser<Rule>,
110 depth: &std::cell::Cell<usize>,
111) -> Result<ConditionExpr> {
112 let current = depth.get();
113 if current > MAX_CONDITION_DEPTH {
114 return Err(SigmaParserError::Condition(
115 format!("condition nesting exceeds maximum depth ({MAX_CONDITION_DEPTH})"),
116 None,
117 ));
118 }
119 depth.set(current + 1);
120
121 let errors: std::cell::RefCell<Vec<PrattError>> = std::cell::RefCell::new(Vec::new());
124
125 let result = pratt
126 .map_primary(|primary| {
127 let loc = location_from_pair(&primary);
128 match primary.as_rule() {
129 Rule::ident => ConditionExpr::Identifier(primary.as_str().to_string()),
130 Rule::selector => parse_selector(primary).unwrap_or_else(|e| {
131 errors.borrow_mut().push(PrattError {
132 message: e.to_string(),
133 location: e.location().or(loc),
134 });
135 ConditionExpr::Identifier(String::new())
136 }),
137 Rule::expr => parse_expr(primary, pratt, depth).unwrap_or_else(|e| {
138 errors.borrow_mut().push(PrattError {
139 message: e.to_string(),
140 location: e.location().or(loc),
141 });
142 ConditionExpr::Identifier(String::new())
143 }),
144 other => {
145 errors.borrow_mut().push(PrattError {
146 message: format!("unexpected primary rule: {other:?}"),
147 location: loc,
148 });
149 ConditionExpr::Identifier(String::new())
150 }
151 }
152 })
153 .map_prefix(|op, rhs| {
154 let loc = location_from_pair(&op);
155 match op.as_rule() {
156 Rule::not_op => ConditionExpr::Not(Box::new(rhs)),
157 other => {
158 errors.borrow_mut().push(PrattError {
159 message: format!("unexpected prefix rule: {other:?}"),
160 location: loc,
161 });
162 rhs
163 }
164 }
165 })
166 .map_infix(|lhs, op, rhs| {
167 let loc = location_from_pair(&op);
168 match op.as_rule() {
169 Rule::and_op => merge_binary(ConditionExpr::And, lhs, rhs),
170 Rule::or_op => merge_binary(ConditionExpr::Or, lhs, rhs),
171 other => {
172 errors.borrow_mut().push(PrattError {
173 message: format!("unexpected infix rule: {other:?}"),
174 location: loc,
175 });
176 lhs
177 }
178 }
179 })
180 .parse(pair.into_inner());
181
182 depth.set(depth.get().saturating_sub(1));
183
184 let collected = errors.into_inner();
185 if !collected.is_empty() {
186 let combined = collected
187 .iter()
188 .map(|e| match &e.location {
189 Some(loc) => format!("at {loc}: {}", e.message),
190 None => e.message.clone(),
191 })
192 .collect::<Vec<_>>()
193 .join("; ");
194 let first_loc = collected.iter().find_map(|e| e.location);
195 return Err(SigmaParserError::Condition(combined, first_loc));
196 }
197
198 Ok(result)
199}
200
201fn merge_binary(
204 ctor: fn(Vec<ConditionExpr>) -> ConditionExpr,
205 lhs: ConditionExpr,
206 rhs: ConditionExpr,
207) -> ConditionExpr {
208 let is_and = matches!(ctor(vec![]), ConditionExpr::And(_));
210
211 let mut args = Vec::new();
212 for expr in [lhs, rhs] {
213 match expr {
214 ConditionExpr::And(children) if is_and => args.extend(children),
215 ConditionExpr::Or(children) if !is_and => args.extend(children),
216 other => args.push(other),
217 }
218 }
219
220 ctor(args)
221}
222
223fn parse_selector(pair: Pair<'_, Rule>) -> Result<ConditionExpr> {
224 let mut quantifier_pair = None;
227 let mut target_pair = None;
228
229 for p in pair.into_inner() {
230 match p.as_rule() {
231 Rule::quantifier => quantifier_pair = Some(p),
232 Rule::selector_target => target_pair = Some(p),
233 _ => {} }
235 }
236
237 let quantifier =
238 parse_quantifier(quantifier_pair.ok_or_else(|| {
239 SigmaParserError::Condition("selector missing quantifier".into(), None)
240 })?)?;
241 let pattern = parse_selector_target(
242 target_pair
243 .ok_or_else(|| SigmaParserError::Condition("selector missing target".into(), None))?,
244 )?;
245
246 Ok(ConditionExpr::Selector {
247 quantifier,
248 pattern,
249 })
250}
251
252fn parse_quantifier(pair: Pair<'_, Rule>) -> Result<Quantifier> {
253 let inner = pair
254 .into_inner()
255 .next()
256 .ok_or_else(|| SigmaParserError::Condition("quantifier missing child".into(), None))?;
257 match inner.as_rule() {
258 Rule::all_kw => Ok(Quantifier::All),
259 Rule::any_kw => Ok(Quantifier::Any),
260 Rule::uint => {
261 let n: u64 = inner.as_str().parse().map_err(|e| {
262 SigmaParserError::Condition(format!("invalid quantifier number: {e}"), None)
263 })?;
264 if n == 1 {
265 Ok(Quantifier::Any)
266 } else {
267 Ok(Quantifier::Count(n))
268 }
269 }
270 other => Err(SigmaParserError::Condition(
271 format!("unexpected quantifier rule: {other:?}"),
272 None,
273 )),
274 }
275}
276
277fn parse_selector_target(pair: Pair<'_, Rule>) -> Result<SelectorPattern> {
278 let inner = pair
279 .into_inner()
280 .next()
281 .ok_or_else(|| SigmaParserError::Condition("selector target missing child".into(), None))?;
282 match inner.as_rule() {
283 Rule::them_kw => Ok(SelectorPattern::Them),
284 Rule::ident_pattern => Ok(SelectorPattern::Pattern(inner.as_str().to_string())),
285 other => Err(SigmaParserError::Condition(
286 format!("unexpected selector target rule: {other:?}"),
287 None,
288 )),
289 }
290}
291
292#[cfg(test)]
297mod tests {
298 use super::*;
299
300 #[test]
301 fn test_simple_identifier() {
302 let expr = parse_condition("selection").unwrap();
303 assert_eq!(expr, ConditionExpr::Identifier("selection".to_string()));
304 }
305
306 #[test]
307 fn test_and() {
308 let expr = parse_condition("selection and filter").unwrap();
309 assert_eq!(
310 expr,
311 ConditionExpr::And(vec![
312 ConditionExpr::Identifier("selection".to_string()),
313 ConditionExpr::Identifier("filter".to_string()),
314 ])
315 );
316 }
317
318 #[test]
319 fn test_or() {
320 let expr = parse_condition("selection1 or selection2").unwrap();
321 assert_eq!(
322 expr,
323 ConditionExpr::Or(vec![
324 ConditionExpr::Identifier("selection1".to_string()),
325 ConditionExpr::Identifier("selection2".to_string()),
326 ])
327 );
328 }
329
330 #[test]
331 fn test_not() {
332 let expr = parse_condition("not filter").unwrap();
333 assert_eq!(
334 expr,
335 ConditionExpr::Not(Box::new(ConditionExpr::Identifier("filter".to_string())))
336 );
337 }
338
339 #[test]
340 fn test_and_not() {
341 let expr = parse_condition("selection and not filter").unwrap();
342 assert_eq!(
343 expr,
344 ConditionExpr::And(vec![
345 ConditionExpr::Identifier("selection".to_string()),
346 ConditionExpr::Not(Box::new(ConditionExpr::Identifier("filter".to_string()))),
347 ])
348 );
349 }
350
351 #[test]
352 fn test_precedence_not_and_or() {
353 let expr = parse_condition("a or not b and c").unwrap();
355 assert_eq!(
356 expr,
357 ConditionExpr::Or(vec![
358 ConditionExpr::Identifier("a".to_string()),
359 ConditionExpr::And(vec![
360 ConditionExpr::Not(Box::new(ConditionExpr::Identifier("b".to_string()))),
361 ConditionExpr::Identifier("c".to_string()),
362 ]),
363 ])
364 );
365 }
366
367 #[test]
368 fn test_parentheses() {
369 let expr = parse_condition("(a or b) and c").unwrap();
370 assert_eq!(
371 expr,
372 ConditionExpr::And(vec![
373 ConditionExpr::Or(vec![
374 ConditionExpr::Identifier("a".to_string()),
375 ConditionExpr::Identifier("b".to_string()),
376 ]),
377 ConditionExpr::Identifier("c".to_string()),
378 ])
379 );
380 }
381
382 #[test]
383 fn test_selector_1_of_pattern() {
384 let expr = parse_condition("1 of selection_*").unwrap();
385 assert_eq!(
386 expr,
387 ConditionExpr::Selector {
388 quantifier: Quantifier::Any,
389 pattern: SelectorPattern::Pattern("selection_*".to_string()),
390 }
391 );
392 }
393
394 #[test]
395 fn test_selector_all_of_them() {
396 let expr = parse_condition("all of them").unwrap();
397 assert_eq!(
398 expr,
399 ConditionExpr::Selector {
400 quantifier: Quantifier::All,
401 pattern: SelectorPattern::Them,
402 }
403 );
404 }
405
406 #[test]
407 fn test_selector_any_of() {
408 let expr = parse_condition("any of selection*").unwrap();
409 assert_eq!(
410 expr,
411 ConditionExpr::Selector {
412 quantifier: Quantifier::Any,
413 pattern: SelectorPattern::Pattern("selection*".to_string()),
414 }
415 );
416 }
417
418 #[test]
419 fn test_complex_condition() {
420 let expr = parse_condition(
422 "selection_main and 1 of selection_dword_* and not 1 of filter_optional_*",
423 )
424 .unwrap();
425 assert_eq!(
426 expr,
427 ConditionExpr::And(vec![
428 ConditionExpr::Identifier("selection_main".to_string()),
429 ConditionExpr::Selector {
430 quantifier: Quantifier::Any,
431 pattern: SelectorPattern::Pattern("selection_dword_*".to_string()),
432 },
433 ConditionExpr::Not(Box::new(ConditionExpr::Selector {
434 quantifier: Quantifier::Any,
435 pattern: SelectorPattern::Pattern("filter_optional_*".to_string()),
436 })),
437 ])
438 );
439 }
440
441 #[test]
442 fn test_identifier_with_keyword_substring() {
443 let expr = parse_condition("selection_and_filter").unwrap();
445 assert_eq!(
446 expr,
447 ConditionExpr::Identifier("selection_and_filter".to_string())
448 );
449 }
450
451 #[test]
452 fn test_identifier_with_hyphen() {
453 let expr = parse_condition("my-selection and my-filter").unwrap();
454 assert_eq!(
455 expr,
456 ConditionExpr::And(vec![
457 ConditionExpr::Identifier("my-selection".to_string()),
458 ConditionExpr::Identifier("my-filter".to_string()),
459 ])
460 );
461 }
462
463 #[test]
464 fn test_triple_and_flattened() {
465 let expr = parse_condition("a and b and c").unwrap();
466 assert_eq!(
467 expr,
468 ConditionExpr::And(vec![
469 ConditionExpr::Identifier("a".to_string()),
470 ConditionExpr::Identifier("b".to_string()),
471 ConditionExpr::Identifier("c".to_string()),
472 ])
473 );
474 }
475
476 #[test]
477 fn test_triple_or_flattened() {
478 let expr = parse_condition("a or b or c").unwrap();
479 assert_eq!(
480 expr,
481 ConditionExpr::Or(vec![
482 ConditionExpr::Identifier("a".to_string()),
483 ConditionExpr::Identifier("b".to_string()),
484 ConditionExpr::Identifier("c".to_string()),
485 ])
486 );
487 }
488
489 #[test]
490 fn test_all_of_selection_and_not_filter() {
491 let expr =
492 parse_condition("all of selection_powershell_* or all of selection_wmic_*").unwrap();
493 assert_eq!(
494 expr,
495 ConditionExpr::Or(vec![
496 ConditionExpr::Selector {
497 quantifier: Quantifier::All,
498 pattern: SelectorPattern::Pattern("selection_powershell_*".to_string()),
499 },
500 ConditionExpr::Selector {
501 quantifier: Quantifier::All,
502 pattern: SelectorPattern::Pattern("selection_wmic_*".to_string()),
503 },
504 ])
505 );
506 }
507
508 #[test]
509 fn test_real_world_complex() {
510 let expr = parse_condition(
512 "selection_key and (all of selection_powershell_* or all of selection_wmic_*)",
513 )
514 .unwrap();
515 assert_eq!(
516 expr,
517 ConditionExpr::And(vec![
518 ConditionExpr::Identifier("selection_key".to_string()),
519 ConditionExpr::Or(vec![
520 ConditionExpr::Selector {
521 quantifier: Quantifier::All,
522 pattern: SelectorPattern::Pattern("selection_powershell_*".to_string()),
523 },
524 ConditionExpr::Selector {
525 quantifier: Quantifier::All,
526 pattern: SelectorPattern::Pattern("selection_wmic_*".to_string()),
527 },
528 ]),
529 ])
530 );
531 }
532
533 #[test]
534 fn test_1_of_them() {
535 let expr = parse_condition("1 of them").unwrap();
536 assert_eq!(
537 expr,
538 ConditionExpr::Selector {
539 quantifier: Quantifier::Any,
540 pattern: SelectorPattern::Them,
541 }
542 );
543 }
544
545 #[test]
546 fn test_count_of() {
547 let expr = parse_condition("3 of selection_*").unwrap();
548 assert_eq!(
549 expr,
550 ConditionExpr::Selector {
551 quantifier: Quantifier::Count(3),
552 pattern: SelectorPattern::Pattern("selection_*".to_string()),
553 }
554 );
555 }
556
557 #[test]
558 fn test_not_1_of_filter() {
559 let expr = parse_condition("selection and not 1 of filter*").unwrap();
560 assert_eq!(
561 expr,
562 ConditionExpr::And(vec![
563 ConditionExpr::Identifier("selection".to_string()),
564 ConditionExpr::Not(Box::new(ConditionExpr::Selector {
565 quantifier: Quantifier::Any,
566 pattern: SelectorPattern::Pattern("filter*".to_string()),
567 })),
568 ])
569 );
570 }
571
572 #[test]
575 fn test_selector_multi_wildcard_pattern() {
576 let expr = parse_condition("1 of selection_*_*").unwrap();
577 assert_eq!(
578 expr,
579 ConditionExpr::Selector {
580 quantifier: Quantifier::Any,
581 pattern: SelectorPattern::Pattern("selection_*_*".to_string()),
582 }
583 );
584 }
585
586 #[test]
587 fn test_selector_leading_wildcard_pattern() {
588 let expr = parse_condition("all of *_selection_*").unwrap();
589 assert_eq!(
590 expr,
591 ConditionExpr::Selector {
592 quantifier: Quantifier::All,
593 pattern: SelectorPattern::Pattern("*_selection_*".to_string()),
594 }
595 );
596 }
597
598 #[test]
599 fn test_selector_bare_wildcard() {
600 let expr = parse_condition("1 of *").unwrap();
601 assert_eq!(
602 expr,
603 ConditionExpr::Selector {
604 quantifier: Quantifier::Any,
605 pattern: SelectorPattern::Pattern("*".to_string()),
606 }
607 );
608 }
609
610 #[test]
611 fn test_selector_triple_wildcard_segment() {
612 let expr = parse_condition("any of sel_*_*_*").unwrap();
613 assert_eq!(
614 expr,
615 ConditionExpr::Selector {
616 quantifier: Quantifier::Any,
617 pattern: SelectorPattern::Pattern("sel_*_*_*".to_string()),
618 }
619 );
620 }
621
622 #[test]
623 fn test_multi_wildcard_in_complex_condition() {
624 let expr =
625 parse_condition("selection_main and 1 of sel_*_* and not 1 of filter_*_*").unwrap();
626 assert_eq!(
627 expr,
628 ConditionExpr::And(vec![
629 ConditionExpr::Identifier("selection_main".to_string()),
630 ConditionExpr::Selector {
631 quantifier: Quantifier::Any,
632 pattern: SelectorPattern::Pattern("sel_*_*".to_string()),
633 },
634 ConditionExpr::Not(Box::new(ConditionExpr::Selector {
635 quantifier: Quantifier::Any,
636 pattern: SelectorPattern::Pattern("filter_*_*".to_string()),
637 })),
638 ])
639 );
640 }
641
642 #[test]
643 fn test_selector_wildcard_only_prefix() {
644 let expr = parse_condition("all of *suffix").unwrap();
645 assert_eq!(
646 expr,
647 ConditionExpr::Selector {
648 quantifier: Quantifier::All,
649 pattern: SelectorPattern::Pattern("*suffix".to_string()),
650 }
651 );
652 }
653
654 #[test]
657 fn test_empty_string_fails() {
658 let err = parse_condition("").unwrap_err();
659 assert!(matches!(err, SigmaParserError::Condition(_, _)));
660 }
661
662 #[test]
663 fn test_whitespace_only_fails() {
664 let err = parse_condition(" ").unwrap_err();
665 assert!(matches!(err, SigmaParserError::Condition(_, _)));
666 }
667
668 #[test]
669 fn test_trailing_operator_fails() {
670 let err = parse_condition("selection and").unwrap_err();
671 assert!(matches!(err, SigmaParserError::Condition(_, _)));
672 }
673
674 #[test]
675 fn test_leading_operator_fails() {
676 let err = parse_condition("and selection").unwrap_err();
677 assert!(matches!(err, SigmaParserError::Condition(_, _)));
678 }
679
680 #[test]
681 fn test_double_operator_fails() {
682 let err = parse_condition("selection and and filter").unwrap_err();
683 assert!(matches!(err, SigmaParserError::Condition(_, _)));
684 }
685
686 #[test]
687 fn test_unbalanced_open_paren_fails() {
688 let err = parse_condition("(selection and filter").unwrap_err();
689 assert!(matches!(err, SigmaParserError::Condition(_, _)));
690 }
691
692 #[test]
693 fn test_unbalanced_close_paren_fails() {
694 let err = parse_condition("selection and filter)").unwrap_err();
695 assert!(matches!(err, SigmaParserError::Condition(_, _)));
696 }
697
698 #[test]
699 fn test_empty_parens_fails() {
700 let err = parse_condition("()").unwrap_err();
701 assert!(matches!(err, SigmaParserError::Condition(_, _)));
702 }
703
704 #[test]
705 fn test_only_operator_fails() {
706 let err = parse_condition("and").unwrap_err();
707 assert!(matches!(err, SigmaParserError::Condition(_, _)));
708 }
709
710 #[test]
711 fn test_only_not_fails() {
712 let err = parse_condition("not").unwrap_err();
713 assert!(matches!(err, SigmaParserError::Condition(_, _)));
714 }
715
716 #[test]
717 fn test_or_alone_fails() {
718 let err = parse_condition("or").unwrap_err();
719 assert!(matches!(err, SigmaParserError::Condition(_, _)));
720 }
721
722 #[test]
723 fn test_incomplete_selector_missing_target_fails() {
724 let err = parse_condition("1 of").unwrap_err();
725 assert!(matches!(err, SigmaParserError::Condition(_, _)));
726 }
727
728 #[test]
729 fn test_of_without_quantifier_fails() {
730 let err = parse_condition("of selection_*").unwrap_err();
731 assert!(matches!(err, SigmaParserError::Condition(_, _)));
732 }
733
734 #[test]
735 fn test_pest_error_carries_location() {
736 let err = parse_condition("selection and").unwrap_err();
737 match &err {
738 SigmaParserError::Condition(_, loc) => {
739 assert!(
740 loc.is_some(),
741 "pest parse errors should carry source location"
742 );
743 }
744 _ => panic!("Expected Condition error"),
745 }
746 }
747
748 #[test]
749 fn test_invalid_characters_fails() {
750 let err = parse_condition("selection @ filter").unwrap_err();
751 assert!(matches!(err, SigmaParserError::Condition(_, _)));
752 }
753
754 #[test]
755 fn test_nested_empty_parens_fails() {
756 let err = parse_condition("selection and ()").unwrap_err();
757 assert!(matches!(err, SigmaParserError::Condition(_, _)));
758 }
759
760 #[test]
761 fn condition_too_long_returns_error() {
762 let big = "a".repeat(MAX_CONDITION_LEN + 1);
763 let err = parse_condition(&big).unwrap_err();
764 assert!(
765 matches!(err, SigmaParserError::ConditionTooLong(_, _)),
766 "expected ConditionTooLong, got: {err}"
767 );
768 }
769
770 #[test]
771 fn moderate_condition_still_parses() {
772 let expr = parse_condition("((((((((((a and b))))))))))").unwrap();
773 match expr {
774 ConditionExpr::And(children) => assert_eq!(children.len(), 2),
775 other => panic!("expected And, got {other:?}"),
776 }
777 }
778}