fuzzcheck/mutators/grammar/
regex.rs

1use std::rc::Rc;
2
3use regex_syntax::hir::{Class, HirKind, Literal, RepetitionKind, RepetitionRange};
4
5use crate::mutators::grammar::{alternation, concatenation, literal, literal_ranges, repetition, Grammar};
6
7#[coverage(off)]
8pub(crate) fn grammar_from_regex(regex: &str) -> Rc<Grammar> {
9    let mut parser = regex_syntax::Parser::new();
10    let hir = parser.parse(regex).unwrap();
11    grammar_from_regex_hir_kind(hir.kind())
12}
13#[coverage(off)]
14pub fn grammar_from_regex_hir_kind(hir: &HirKind) -> Rc<Grammar> {
15    match hir {
16        HirKind::Empty => panic!("empty regexes are not supported"),
17        HirKind::Literal(l) => match l {
18            Literal::Unicode(l) => literal(*l),
19            Literal::Byte(_) => panic!("non-unicode regexes are not supported"),
20        },
21        HirKind::Class(class) => match class {
22            Class::Unicode(class) => {
23                let ranges = class
24                    .ranges()
25                    .iter()
26                    .map(
27                        #[coverage(off)]
28                        |r| r.start()..=r.end(),
29                    )
30                    .collect::<Vec<_>>();
31                literal_ranges(ranges)
32            }
33            Class::Bytes(_) => panic!("non-unicode regexes are not supported"),
34        },
35        HirKind::Anchor(_) => panic!("anchors are not supported"),
36        HirKind::WordBoundary(_) => panic!("word boundaries are not supported"),
37        HirKind::Repetition(rep) => {
38            let range = match rep.kind.clone() {
39                RepetitionKind::ZeroOrOne => 0..=1u32,
40                RepetitionKind::ZeroOrMore => 0..=u32::MAX,
41                RepetitionKind::OneOrMore => 1..=u32::MAX,
42                RepetitionKind::Range(range) => match range {
43                    RepetitionRange::Exactly(n) => n..=n,
44                    RepetitionRange::AtLeast(n) => n..=u32::MAX,
45                    RepetitionRange::Bounded(n, m) => n..=m,
46                },
47            };
48            let range = (*range.start() as usize)..=(*range.end() as usize);
49            let grammar = grammar_from_regex_hir_kind(rep.hir.kind());
50            repetition(grammar, range)
51        }
52        HirKind::Group(group) => grammar_from_regex_hir_kind(group.hir.kind()),
53        HirKind::Concat(concat) => concatenation(concat.iter().map(
54            #[coverage(off)]
55            |hir| grammar_from_regex_hir_kind(hir.kind()),
56        )),
57        HirKind::Alternation(alt) => alternation(alt.iter().map(
58            #[coverage(off)]
59            |hir| grammar_from_regex_hir_kind(hir.kind()),
60        )),
61    }
62}