fuzzcheck/mutators/grammar/
regex.rs1use std::rc::Rc;
2
3use regex_syntax::hir::{Class, HirKind, Literal, RepetitionKind, RepetitionRange};
4
5use crate::mutators::grammar::{alternation, concatenation, literal, literal_ranges, repetition, Grammar};
6
7#[coverage(off)]
8pub(crate) fn grammar_from_regex(regex: &str) -> Rc<Grammar> {
9 let mut parser = regex_syntax::Parser::new();
10 let hir = parser.parse(regex).unwrap();
11 grammar_from_regex_hir_kind(hir.kind())
12}
13#[coverage(off)]
14pub fn grammar_from_regex_hir_kind(hir: &HirKind) -> Rc<Grammar> {
15 match hir {
16 HirKind::Empty => panic!("empty regexes are not supported"),
17 HirKind::Literal(l) => match l {
18 Literal::Unicode(l) => literal(*l),
19 Literal::Byte(_) => panic!("non-unicode regexes are not supported"),
20 },
21 HirKind::Class(class) => match class {
22 Class::Unicode(class) => {
23 let ranges = class
24 .ranges()
25 .iter()
26 .map(
27 #[coverage(off)]
28 |r| r.start()..=r.end(),
29 )
30 .collect::<Vec<_>>();
31 literal_ranges(ranges)
32 }
33 Class::Bytes(_) => panic!("non-unicode regexes are not supported"),
34 },
35 HirKind::Anchor(_) => panic!("anchors are not supported"),
36 HirKind::WordBoundary(_) => panic!("word boundaries are not supported"),
37 HirKind::Repetition(rep) => {
38 let range = match rep.kind.clone() {
39 RepetitionKind::ZeroOrOne => 0..=1u32,
40 RepetitionKind::ZeroOrMore => 0..=u32::MAX,
41 RepetitionKind::OneOrMore => 1..=u32::MAX,
42 RepetitionKind::Range(range) => match range {
43 RepetitionRange::Exactly(n) => n..=n,
44 RepetitionRange::AtLeast(n) => n..=u32::MAX,
45 RepetitionRange::Bounded(n, m) => n..=m,
46 },
47 };
48 let range = (*range.start() as usize)..=(*range.end() as usize);
49 let grammar = grammar_from_regex_hir_kind(rep.hir.kind());
50 repetition(grammar, range)
51 }
52 HirKind::Group(group) => grammar_from_regex_hir_kind(group.hir.kind()),
53 HirKind::Concat(concat) => concatenation(concat.iter().map(
54 #[coverage(off)]
55 |hir| grammar_from_regex_hir_kind(hir.kind()),
56 )),
57 HirKind::Alternation(alt) => alternation(alt.iter().map(
58 #[coverage(off)]
59 |hir| grammar_from_regex_hir_kind(hir.kind()),
60 )),
61 }
62}