1use pest::Parser;
12use pest::iterators::Pair;
13use pest::pratt_parser::{Assoc, Op, PrattParser};
14use pest_derive::Parser;
15
16use crate::ast::{ConditionExpr, Quantifier, SelectorPattern};
17use crate::error::{Result, SigmaParserError, SourceLocation};
18
19#[derive(Parser)]
24#[grammar = "src/sigma.pest"]
25struct SigmaConditionParser;
26
27pub fn parse_condition(input: &str) -> Result<ConditionExpr> {
42 let pairs = SigmaConditionParser::parse(Rule::condition, input).map_err(|e| {
43 let loc = extract_pest_location(&e);
44 SigmaParserError::Condition(e.to_string(), loc)
45 })?;
46
47 let pratt = PrattParser::new()
48 .op(Op::infix(Rule::or_op, Assoc::Left))
49 .op(Op::infix(Rule::and_op, Assoc::Left))
50 .op(Op::prefix(Rule::not_op));
51
52 let condition_pair = pairs
54 .into_iter()
55 .next()
56 .ok_or_else(|| SigmaParserError::Condition("empty condition expression".into(), None))?;
57 let expr_pair = condition_pair
58 .into_inner()
59 .find(|p| p.as_rule() == Rule::expr)
60 .ok_or_else(|| SigmaParserError::Condition("missing expr in condition".into(), None))?;
61
62 parse_expr(expr_pair, &pratt)
63}
64
65fn extract_pest_location(err: &pest::error::Error<Rule>) -> Option<SourceLocation> {
66 match err.line_col {
67 pest::error::LineColLocation::Pos((line, col)) => Some(SourceLocation {
68 line: line as u32,
69 col: col as u32,
70 }),
71 pest::error::LineColLocation::Span((line, col), _) => Some(SourceLocation {
72 line: line as u32,
73 col: col as u32,
74 }),
75 }
76}
77
78struct PrattError {
84 message: String,
85 location: Option<SourceLocation>,
86}
87
88fn location_from_pair(pair: &Pair<'_, Rule>) -> Option<SourceLocation> {
89 let (line, col) = pair.as_span().start_pos().line_col();
90 Some(SourceLocation {
91 line: line as u32,
92 col: col as u32,
93 })
94}
95
96fn parse_expr(pair: Pair<'_, Rule>, pratt: &PrattParser<Rule>) -> Result<ConditionExpr> {
97 let errors: std::cell::RefCell<Vec<PrattError>> = std::cell::RefCell::new(Vec::new());
100
101 let result = pratt
102 .map_primary(|primary| {
103 let loc = location_from_pair(&primary);
104 match primary.as_rule() {
105 Rule::ident => ConditionExpr::Identifier(primary.as_str().to_string()),
106 Rule::selector => parse_selector(primary).unwrap_or_else(|e| {
107 errors.borrow_mut().push(PrattError {
108 message: e.to_string(),
109 location: e.location().or(loc),
110 });
111 ConditionExpr::Identifier(String::new())
112 }),
113 Rule::expr => parse_expr(primary, pratt).unwrap_or_else(|e| {
114 errors.borrow_mut().push(PrattError {
115 message: e.to_string(),
116 location: e.location().or(loc),
117 });
118 ConditionExpr::Identifier(String::new())
119 }),
120 other => {
121 errors.borrow_mut().push(PrattError {
122 message: format!("unexpected primary rule: {other:?}"),
123 location: loc,
124 });
125 ConditionExpr::Identifier(String::new())
126 }
127 }
128 })
129 .map_prefix(|op, rhs| {
130 let loc = location_from_pair(&op);
131 match op.as_rule() {
132 Rule::not_op => ConditionExpr::Not(Box::new(rhs)),
133 other => {
134 errors.borrow_mut().push(PrattError {
135 message: format!("unexpected prefix rule: {other:?}"),
136 location: loc,
137 });
138 rhs
139 }
140 }
141 })
142 .map_infix(|lhs, op, rhs| {
143 let loc = location_from_pair(&op);
144 match op.as_rule() {
145 Rule::and_op => merge_binary(ConditionExpr::And, lhs, rhs),
146 Rule::or_op => merge_binary(ConditionExpr::Or, lhs, rhs),
147 other => {
148 errors.borrow_mut().push(PrattError {
149 message: format!("unexpected infix rule: {other:?}"),
150 location: loc,
151 });
152 lhs
153 }
154 }
155 })
156 .parse(pair.into_inner());
157
158 let collected = errors.into_inner();
159 if !collected.is_empty() {
160 let combined = collected
161 .iter()
162 .map(|e| match &e.location {
163 Some(loc) => format!("at {loc}: {}", e.message),
164 None => e.message.clone(),
165 })
166 .collect::<Vec<_>>()
167 .join("; ");
168 let first_loc = collected.iter().find_map(|e| e.location);
169 return Err(SigmaParserError::Condition(combined, first_loc));
170 }
171
172 Ok(result)
173}
174
175fn merge_binary(
178 ctor: fn(Vec<ConditionExpr>) -> ConditionExpr,
179 lhs: ConditionExpr,
180 rhs: ConditionExpr,
181) -> ConditionExpr {
182 let is_and = matches!(ctor(vec![]), ConditionExpr::And(_));
184
185 let mut args = Vec::new();
186 for expr in [lhs, rhs] {
187 match expr {
188 ConditionExpr::And(children) if is_and => args.extend(children),
189 ConditionExpr::Or(children) if !is_and => args.extend(children),
190 other => args.push(other),
191 }
192 }
193
194 ctor(args)
195}
196
197fn parse_selector(pair: Pair<'_, Rule>) -> Result<ConditionExpr> {
198 let mut quantifier_pair = None;
201 let mut target_pair = None;
202
203 for p in pair.into_inner() {
204 match p.as_rule() {
205 Rule::quantifier => quantifier_pair = Some(p),
206 Rule::selector_target => target_pair = Some(p),
207 _ => {} }
209 }
210
211 let quantifier =
212 parse_quantifier(quantifier_pair.ok_or_else(|| {
213 SigmaParserError::Condition("selector missing quantifier".into(), None)
214 })?)?;
215 let pattern = parse_selector_target(
216 target_pair
217 .ok_or_else(|| SigmaParserError::Condition("selector missing target".into(), None))?,
218 )?;
219
220 Ok(ConditionExpr::Selector {
221 quantifier,
222 pattern,
223 })
224}
225
226fn parse_quantifier(pair: Pair<'_, Rule>) -> Result<Quantifier> {
227 let inner = pair
228 .into_inner()
229 .next()
230 .ok_or_else(|| SigmaParserError::Condition("quantifier missing child".into(), None))?;
231 match inner.as_rule() {
232 Rule::all_kw => Ok(Quantifier::All),
233 Rule::any_kw => Ok(Quantifier::Any),
234 Rule::uint => {
235 let n: u64 = inner.as_str().parse().map_err(|e| {
236 SigmaParserError::Condition(format!("invalid quantifier number: {e}"), None)
237 })?;
238 if n == 1 {
239 Ok(Quantifier::Any)
240 } else {
241 Ok(Quantifier::Count(n))
242 }
243 }
244 other => Err(SigmaParserError::Condition(
245 format!("unexpected quantifier rule: {other:?}"),
246 None,
247 )),
248 }
249}
250
251fn parse_selector_target(pair: Pair<'_, Rule>) -> Result<SelectorPattern> {
252 let inner = pair
253 .into_inner()
254 .next()
255 .ok_or_else(|| SigmaParserError::Condition("selector target missing child".into(), None))?;
256 match inner.as_rule() {
257 Rule::them_kw => Ok(SelectorPattern::Them),
258 Rule::ident_pattern => Ok(SelectorPattern::Pattern(inner.as_str().to_string())),
259 other => Err(SigmaParserError::Condition(
260 format!("unexpected selector target rule: {other:?}"),
261 None,
262 )),
263 }
264}
265
266#[cfg(test)]
271mod tests {
272 use super::*;
273
274 #[test]
275 fn test_simple_identifier() {
276 let expr = parse_condition("selection").unwrap();
277 assert_eq!(expr, ConditionExpr::Identifier("selection".to_string()));
278 }
279
280 #[test]
281 fn test_and() {
282 let expr = parse_condition("selection and filter").unwrap();
283 assert_eq!(
284 expr,
285 ConditionExpr::And(vec![
286 ConditionExpr::Identifier("selection".to_string()),
287 ConditionExpr::Identifier("filter".to_string()),
288 ])
289 );
290 }
291
292 #[test]
293 fn test_or() {
294 let expr = parse_condition("selection1 or selection2").unwrap();
295 assert_eq!(
296 expr,
297 ConditionExpr::Or(vec![
298 ConditionExpr::Identifier("selection1".to_string()),
299 ConditionExpr::Identifier("selection2".to_string()),
300 ])
301 );
302 }
303
304 #[test]
305 fn test_not() {
306 let expr = parse_condition("not filter").unwrap();
307 assert_eq!(
308 expr,
309 ConditionExpr::Not(Box::new(ConditionExpr::Identifier("filter".to_string())))
310 );
311 }
312
313 #[test]
314 fn test_and_not() {
315 let expr = parse_condition("selection and not filter").unwrap();
316 assert_eq!(
317 expr,
318 ConditionExpr::And(vec![
319 ConditionExpr::Identifier("selection".to_string()),
320 ConditionExpr::Not(Box::new(ConditionExpr::Identifier("filter".to_string()))),
321 ])
322 );
323 }
324
325 #[test]
326 fn test_precedence_not_and_or() {
327 let expr = parse_condition("a or not b and c").unwrap();
329 assert_eq!(
330 expr,
331 ConditionExpr::Or(vec![
332 ConditionExpr::Identifier("a".to_string()),
333 ConditionExpr::And(vec![
334 ConditionExpr::Not(Box::new(ConditionExpr::Identifier("b".to_string()))),
335 ConditionExpr::Identifier("c".to_string()),
336 ]),
337 ])
338 );
339 }
340
341 #[test]
342 fn test_parentheses() {
343 let expr = parse_condition("(a or b) and c").unwrap();
344 assert_eq!(
345 expr,
346 ConditionExpr::And(vec![
347 ConditionExpr::Or(vec![
348 ConditionExpr::Identifier("a".to_string()),
349 ConditionExpr::Identifier("b".to_string()),
350 ]),
351 ConditionExpr::Identifier("c".to_string()),
352 ])
353 );
354 }
355
356 #[test]
357 fn test_selector_1_of_pattern() {
358 let expr = parse_condition("1 of selection_*").unwrap();
359 assert_eq!(
360 expr,
361 ConditionExpr::Selector {
362 quantifier: Quantifier::Any,
363 pattern: SelectorPattern::Pattern("selection_*".to_string()),
364 }
365 );
366 }
367
368 #[test]
369 fn test_selector_all_of_them() {
370 let expr = parse_condition("all of them").unwrap();
371 assert_eq!(
372 expr,
373 ConditionExpr::Selector {
374 quantifier: Quantifier::All,
375 pattern: SelectorPattern::Them,
376 }
377 );
378 }
379
380 #[test]
381 fn test_selector_any_of() {
382 let expr = parse_condition("any of selection*").unwrap();
383 assert_eq!(
384 expr,
385 ConditionExpr::Selector {
386 quantifier: Quantifier::Any,
387 pattern: SelectorPattern::Pattern("selection*".to_string()),
388 }
389 );
390 }
391
392 #[test]
393 fn test_complex_condition() {
394 let expr = parse_condition(
396 "selection_main and 1 of selection_dword_* and not 1 of filter_optional_*",
397 )
398 .unwrap();
399 assert_eq!(
400 expr,
401 ConditionExpr::And(vec![
402 ConditionExpr::Identifier("selection_main".to_string()),
403 ConditionExpr::Selector {
404 quantifier: Quantifier::Any,
405 pattern: SelectorPattern::Pattern("selection_dword_*".to_string()),
406 },
407 ConditionExpr::Not(Box::new(ConditionExpr::Selector {
408 quantifier: Quantifier::Any,
409 pattern: SelectorPattern::Pattern("filter_optional_*".to_string()),
410 })),
411 ])
412 );
413 }
414
415 #[test]
416 fn test_identifier_with_keyword_substring() {
417 let expr = parse_condition("selection_and_filter").unwrap();
419 assert_eq!(
420 expr,
421 ConditionExpr::Identifier("selection_and_filter".to_string())
422 );
423 }
424
425 #[test]
426 fn test_identifier_with_hyphen() {
427 let expr = parse_condition("my-selection and my-filter").unwrap();
428 assert_eq!(
429 expr,
430 ConditionExpr::And(vec![
431 ConditionExpr::Identifier("my-selection".to_string()),
432 ConditionExpr::Identifier("my-filter".to_string()),
433 ])
434 );
435 }
436
437 #[test]
438 fn test_triple_and_flattened() {
439 let expr = parse_condition("a and b and c").unwrap();
440 assert_eq!(
441 expr,
442 ConditionExpr::And(vec![
443 ConditionExpr::Identifier("a".to_string()),
444 ConditionExpr::Identifier("b".to_string()),
445 ConditionExpr::Identifier("c".to_string()),
446 ])
447 );
448 }
449
450 #[test]
451 fn test_triple_or_flattened() {
452 let expr = parse_condition("a or b or c").unwrap();
453 assert_eq!(
454 expr,
455 ConditionExpr::Or(vec![
456 ConditionExpr::Identifier("a".to_string()),
457 ConditionExpr::Identifier("b".to_string()),
458 ConditionExpr::Identifier("c".to_string()),
459 ])
460 );
461 }
462
463 #[test]
464 fn test_all_of_selection_and_not_filter() {
465 let expr =
466 parse_condition("all of selection_powershell_* or all of selection_wmic_*").unwrap();
467 assert_eq!(
468 expr,
469 ConditionExpr::Or(vec![
470 ConditionExpr::Selector {
471 quantifier: Quantifier::All,
472 pattern: SelectorPattern::Pattern("selection_powershell_*".to_string()),
473 },
474 ConditionExpr::Selector {
475 quantifier: Quantifier::All,
476 pattern: SelectorPattern::Pattern("selection_wmic_*".to_string()),
477 },
478 ])
479 );
480 }
481
482 #[test]
483 fn test_real_world_complex() {
484 let expr = parse_condition(
486 "selection_key and (all of selection_powershell_* or all of selection_wmic_*)",
487 )
488 .unwrap();
489 assert_eq!(
490 expr,
491 ConditionExpr::And(vec![
492 ConditionExpr::Identifier("selection_key".to_string()),
493 ConditionExpr::Or(vec![
494 ConditionExpr::Selector {
495 quantifier: Quantifier::All,
496 pattern: SelectorPattern::Pattern("selection_powershell_*".to_string()),
497 },
498 ConditionExpr::Selector {
499 quantifier: Quantifier::All,
500 pattern: SelectorPattern::Pattern("selection_wmic_*".to_string()),
501 },
502 ]),
503 ])
504 );
505 }
506
507 #[test]
508 fn test_1_of_them() {
509 let expr = parse_condition("1 of them").unwrap();
510 assert_eq!(
511 expr,
512 ConditionExpr::Selector {
513 quantifier: Quantifier::Any,
514 pattern: SelectorPattern::Them,
515 }
516 );
517 }
518
519 #[test]
520 fn test_count_of() {
521 let expr = parse_condition("3 of selection_*").unwrap();
522 assert_eq!(
523 expr,
524 ConditionExpr::Selector {
525 quantifier: Quantifier::Count(3),
526 pattern: SelectorPattern::Pattern("selection_*".to_string()),
527 }
528 );
529 }
530
531 #[test]
532 fn test_not_1_of_filter() {
533 let expr = parse_condition("selection and not 1 of filter*").unwrap();
534 assert_eq!(
535 expr,
536 ConditionExpr::And(vec![
537 ConditionExpr::Identifier("selection".to_string()),
538 ConditionExpr::Not(Box::new(ConditionExpr::Selector {
539 quantifier: Quantifier::Any,
540 pattern: SelectorPattern::Pattern("filter*".to_string()),
541 })),
542 ])
543 );
544 }
545
546 #[test]
549 fn test_selector_multi_wildcard_pattern() {
550 let expr = parse_condition("1 of selection_*_*").unwrap();
551 assert_eq!(
552 expr,
553 ConditionExpr::Selector {
554 quantifier: Quantifier::Any,
555 pattern: SelectorPattern::Pattern("selection_*_*".to_string()),
556 }
557 );
558 }
559
560 #[test]
561 fn test_selector_leading_wildcard_pattern() {
562 let expr = parse_condition("all of *_selection_*").unwrap();
563 assert_eq!(
564 expr,
565 ConditionExpr::Selector {
566 quantifier: Quantifier::All,
567 pattern: SelectorPattern::Pattern("*_selection_*".to_string()),
568 }
569 );
570 }
571
572 #[test]
573 fn test_selector_bare_wildcard() {
574 let expr = parse_condition("1 of *").unwrap();
575 assert_eq!(
576 expr,
577 ConditionExpr::Selector {
578 quantifier: Quantifier::Any,
579 pattern: SelectorPattern::Pattern("*".to_string()),
580 }
581 );
582 }
583
584 #[test]
585 fn test_selector_triple_wildcard_segment() {
586 let expr = parse_condition("any of sel_*_*_*").unwrap();
587 assert_eq!(
588 expr,
589 ConditionExpr::Selector {
590 quantifier: Quantifier::Any,
591 pattern: SelectorPattern::Pattern("sel_*_*_*".to_string()),
592 }
593 );
594 }
595
596 #[test]
597 fn test_multi_wildcard_in_complex_condition() {
598 let expr =
599 parse_condition("selection_main and 1 of sel_*_* and not 1 of filter_*_*").unwrap();
600 assert_eq!(
601 expr,
602 ConditionExpr::And(vec![
603 ConditionExpr::Identifier("selection_main".to_string()),
604 ConditionExpr::Selector {
605 quantifier: Quantifier::Any,
606 pattern: SelectorPattern::Pattern("sel_*_*".to_string()),
607 },
608 ConditionExpr::Not(Box::new(ConditionExpr::Selector {
609 quantifier: Quantifier::Any,
610 pattern: SelectorPattern::Pattern("filter_*_*".to_string()),
611 })),
612 ])
613 );
614 }
615
616 #[test]
617 fn test_selector_wildcard_only_prefix() {
618 let expr = parse_condition("all of *suffix").unwrap();
619 assert_eq!(
620 expr,
621 ConditionExpr::Selector {
622 quantifier: Quantifier::All,
623 pattern: SelectorPattern::Pattern("*suffix".to_string()),
624 }
625 );
626 }
627
628 #[test]
631 fn test_empty_string_fails() {
632 let err = parse_condition("").unwrap_err();
633 assert!(matches!(err, SigmaParserError::Condition(_, _)));
634 }
635
636 #[test]
637 fn test_whitespace_only_fails() {
638 let err = parse_condition(" ").unwrap_err();
639 assert!(matches!(err, SigmaParserError::Condition(_, _)));
640 }
641
642 #[test]
643 fn test_trailing_operator_fails() {
644 let err = parse_condition("selection and").unwrap_err();
645 assert!(matches!(err, SigmaParserError::Condition(_, _)));
646 }
647
648 #[test]
649 fn test_leading_operator_fails() {
650 let err = parse_condition("and selection").unwrap_err();
651 assert!(matches!(err, SigmaParserError::Condition(_, _)));
652 }
653
654 #[test]
655 fn test_double_operator_fails() {
656 let err = parse_condition("selection and and filter").unwrap_err();
657 assert!(matches!(err, SigmaParserError::Condition(_, _)));
658 }
659
660 #[test]
661 fn test_unbalanced_open_paren_fails() {
662 let err = parse_condition("(selection and filter").unwrap_err();
663 assert!(matches!(err, SigmaParserError::Condition(_, _)));
664 }
665
666 #[test]
667 fn test_unbalanced_close_paren_fails() {
668 let err = parse_condition("selection and filter)").unwrap_err();
669 assert!(matches!(err, SigmaParserError::Condition(_, _)));
670 }
671
672 #[test]
673 fn test_empty_parens_fails() {
674 let err = parse_condition("()").unwrap_err();
675 assert!(matches!(err, SigmaParserError::Condition(_, _)));
676 }
677
678 #[test]
679 fn test_only_operator_fails() {
680 let err = parse_condition("and").unwrap_err();
681 assert!(matches!(err, SigmaParserError::Condition(_, _)));
682 }
683
684 #[test]
685 fn test_only_not_fails() {
686 let err = parse_condition("not").unwrap_err();
687 assert!(matches!(err, SigmaParserError::Condition(_, _)));
688 }
689
690 #[test]
691 fn test_or_alone_fails() {
692 let err = parse_condition("or").unwrap_err();
693 assert!(matches!(err, SigmaParserError::Condition(_, _)));
694 }
695
696 #[test]
697 fn test_incomplete_selector_missing_target_fails() {
698 let err = parse_condition("1 of").unwrap_err();
699 assert!(matches!(err, SigmaParserError::Condition(_, _)));
700 }
701
702 #[test]
703 fn test_of_without_quantifier_fails() {
704 let err = parse_condition("of selection_*").unwrap_err();
705 assert!(matches!(err, SigmaParserError::Condition(_, _)));
706 }
707
708 #[test]
709 fn test_pest_error_carries_location() {
710 let err = parse_condition("selection and").unwrap_err();
711 match &err {
712 SigmaParserError::Condition(_, loc) => {
713 assert!(
714 loc.is_some(),
715 "pest parse errors should carry source location"
716 );
717 }
718 _ => panic!("Expected Condition error"),
719 }
720 }
721
722 #[test]
723 fn test_invalid_characters_fails() {
724 let err = parse_condition("selection @ filter").unwrap_err();
725 assert!(matches!(err, SigmaParserError::Condition(_, _)));
726 }
727
728 #[test]
729 fn test_nested_empty_parens_fails() {
730 let err = parse_condition("selection and ()").unwrap_err();
731 assert!(matches!(err, SigmaParserError::Condition(_, _)));
732 }
733}