spindle_lib/
grammar.rs

1use crate::error::ErrorRepr;
2use crate::Error;
3use crate::{ir, regex::Regex, reserved::*, Visitor};
4
5use arbitrary::{Arbitrary, Unstructured};
6use std::{collections::HashSet, fmt, str::FromStr};
7
8/// A state machine that produces `Arbitrary` matching expressions or byte sequences from [`Unstructured`](https://docs.rs/arbitrary/latest/arbitrary/struct.Unstructured.html).
9///
10/// # Implementation
11/// ## Construction
12/// `Grammar` is constructed using `from_str` of a string that specifies the syntax of expressions:
13/// - A peg parser converts the string into an "intermediary representation" AST (in ir.rs).
14/// - The IR is validated for duplicate and conflicting rule definitions
15/// - The IR is converted to a state machine in which regex is parsed and rule definitions are indexed.
16///
17/// ## Expression Generation
18/// `Unstructured` is used to generate `arbitrary` choices and loops to traverse the state machine.
19/// When a "leaf" state is reached, e.g. a pre-defined rule, regex, or string literal, `Unstructured`
20/// is used to write arbitrary data (except for string literals) to an output buffer.
21#[derive(Debug)]
22pub struct Grammar {
23    rules: Vec<Expr>,
24
25    // `how_many[i]` == number of possible traversals at the ith depth
26    // i.e. memoized results of `how_many` calculated on construction.
27    how_many: Vec<Option<u64>>,
28
29    // `reachable[i][j]` == first depth that is reachable by the `i`th branch `Expr`'s `j`th branch.
30    // Branch `Expr`s are `Expr::Optional`, `Expr::Or`, `Expr::Repetition`.
31    // `reachable` is used in `expression`: branches that are not reachable at the current depth are not explored.
32    reachable: Vec<Vec<usize>>,
33}
34
35impl Grammar {
36    /// Returns a resulting `Visitor` after an arbitirary state machine traversal.
37    ///
38    /// The state machine traversal starts at the first rule in the grammar.
39    ///
40    /// # Parameters
41    /// `max_depth` is nesting limit of rule referencing during the traversal.
42    /// For example, the below grammar generates numbers less than or equal to the `max_depth` parameter.
43    /// ```text
44    /// one   : "1" | two ;
45    /// two   : "2" | three ;
46    /// three : "3" ;
47    /// ```
48    pub fn expression<V: Visitor>(
49        &self,
50        u: &mut Unstructured<'_>,
51        max_depth: Option<usize>,
52    ) -> arbitrary::Result<V> {
53        let mut visitor = V::new();
54        let mut to_write = vec![(&self.rules[0], max_depth.unwrap_or(usize::MAX))]; // always start at first rule
55
56        while let Some((expr, depth)) = to_write.pop() {
57            if depth == 0 {
58                return Err(arbitrary::Error::IncorrectFormat);
59            }
60
61            match expr {
62                Expr::Or(v, i) => {
63                    // TODO: might be better way to choose a random item from a filtered list
64                    // will explore ways to avoid the vec allocation when we have benchmarks.
65                    let avail: Vec<_> = (0..v.len())
66                        .filter(|j| self.reachable(depth, *i, *j))
67                        .collect();
68                    let arb_index = u.choose_iter(avail.iter())?;
69                    to_write.push((&v[*arb_index], depth));
70                    visitor.visit_or(*arb_index);
71                }
72                Expr::Concat(v) => {
73                    to_write.extend(v.iter().map(|x| (x, depth)));
74                    visitor.visit_concat();
75                }
76                Expr::Optional(x, i) => {
77                    let b = self.reachable(depth, *i, 0) && bool::arbitrary(u)?;
78                    if b {
79                        to_write.push((x, depth));
80                    }
81                    visitor.visit_optional(b);
82                }
83                Expr::Repetition(x, min_rep, max_rep, i) => {
84                    let mut reps = 0;
85                    if self.reachable(depth, *i, 0) {
86                        u.arbitrary_loop(Some(*min_rep), Some(*max_rep), |_| {
87                            to_write.push((x, depth));
88                            reps += 1;
89                            Ok(std::ops::ControlFlow::Continue(()))
90                        })?;
91                    }
92                    visitor.visit_repetition(reps);
93                }
94                Expr::Reference(index) => {
95                    to_write.push((&self.rules[*index], depth - 1));
96                    visitor.visit_reference(*index);
97                }
98                Expr::Literal(s) => visitor.visit_literal(s.as_str()),
99                Expr::Bytes(b) => visitor.visit_bytes(b),
100                Expr::Regex(regex) => regex.visit(&mut visitor, u)?,
101                Expr::Group(x) => to_write.push((x, depth)),
102                Expr::Predefined(v) => v.visit(&mut visitor, u)?,
103            }
104        }
105        Ok(visitor)
106    }
107
108    /// Returns `true` if the `i`th branch `Expr`s `j`th branch is reachable at `depth`.
109    fn reachable(&self, depth: usize, i: usize, j: usize) -> bool {
110        depth > self.reachable[i][j]
111    }
112
113    /// Returns the number of possible state machine traversals for
114    /// this state machine or `None` if the result exceeds `u64::MAX`.
115    ///
116    /// # Parameters
117    /// `max_depth` is the maximum nested references that the traversal is allowed.
118    /// A `max_depth` of 0 will always return 0.
119    /// `grammar.how_many(depth)` is the number of unique values possible from
120    /// `grammar.expression::<u64>(u, depth)`, barring hash collisions.
121    ///
122    /// # Usage
123    /// Provides a rough possible number of equivalence classes of the grammar,
124    /// which is useful for estimating overall coverage as the fuzzer discovers more
125    /// classes over time.
126    ///
127    /// # Limitations
128    /// 1. No traversals are counted inside of Regex and pre-defined (e.g. String) rules
129    /// i.e. are counted as 1 possible traversal even though many regex expressions or Strings
130    /// are possible.
131    ///
132    /// 2. The result is not aware of duplicate outputs, e.g.
133    /// ```text
134    /// expr : "foo" | "foo" ;
135    /// ```
136    /// counts as 2 traversals, even though every output is "foo".
137    pub fn how_many(&self, max_depth: Option<usize>) -> Option<u64> {
138        let target_depth = std::cmp::min(max_depth.unwrap_or(usize::MAX), self.how_many.len() - 1);
139        self.how_many[target_depth]
140    }
141}
142
143/// Pretty prints the state machine.
144///
145/// It's helpful to check if the compiled state machine matches
146/// what is expected from the the un-parsed grammar
147/// (the printed rules are more verbose and order of operations is clearer)
148impl fmt::Display for Grammar {
149    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> {
150        for (i, rule) in self.rules.iter().enumerate() {
151            writeln!(f, "{}: {}", i, rule)?;
152        }
153        Ok(())
154    }
155}
156
157impl FromStr for Grammar {
158    type Err = Error;
159
160    fn from_str(s: &str) -> Result<Self, Self::Err> {
161        let parsed_ir = ir::bnf::expr(s).map_err(|e| Error(ErrorRepr::Grammar(e)))?;
162        Self::try_from(parsed_ir)
163    }
164}
165
166impl TryFrom<Vec<(String, ir::Expr)>> for Grammar {
167    type Error = Error;
168
169    fn try_from(value: Vec<(String, ir::Expr)>) -> Result<Self, Self::Error> {
170        let (names, ir_exprs): (Vec<String>, Vec<ir::Expr>) = value.into_iter().unzip();
171
172        if let Some(dups) = find_duplicates(&names) {
173            return Err(Error(ErrorRepr::DuplicateVars(dups)));
174        }
175
176        if let Some(res) = filter_reserved_keywords(&names) {
177            return Err(Error(ErrorRepr::Reserved(res)));
178        }
179
180        let mut reachable: Vec<Vec<usize>> = Vec::new();
181        let rules = ir_exprs
182            .into_iter()
183            .map(|ir_expr| Expr::try_new(ir_expr, &names, &mut reachable))
184            .collect::<Result<Vec<Expr>, _>>()?;
185
186        // Use a bottom up approach for calculating the number of traversals:
187        // 1. all rules have 0 traversals at depth 0.
188        // 2. prev[i] == `how_many` for the ith rule at the previously calculated depth.
189        // 3. dp[i] == `how_many` for the ith rule the current depth which depends on `prev`.
190        // 4. finally return `how_many` for the first (start) rule.
191        let mut how_many = vec![Some(0u64)];
192        let mut prev = vec![Some(0u64); rules.len()];
193        loop {
194            let dp: Vec<_> = rules
195                .iter()
196                .map(|r| r.how_many(&rules, &prev, &mut reachable))
197                .collect();
198
199            how_many.push(dp[0]);
200
201            // discovered all possible or already exceeded `u64::MAX`
202            if dp == prev || dp[0] == None {
203                break;
204            }
205
206            prev = dp;
207        }
208
209        assert_eq!(names.len(), rules.len());
210        Ok(Self {
211            rules,
212            reachable,
213            how_many,
214        })
215    }
216}
217
218fn find_duplicates(names: &[String]) -> Option<HashSet<String>> {
219    let mut set: HashSet<String> = names.iter().cloned().collect();
220    let dups: HashSet<String> = names.iter().filter(|&n| !set.remove(n)).cloned().collect();
221    (!dups.is_empty()).then_some(dups)
222}
223
224/// Branch `Expr`s (`Optional`, `Or`, `Repetition`) contain a `usize`
225/// which is index of the branch in the state machine (pre-ordered).
226#[derive(Debug)]
227enum Expr {
228    Or(Vec<Expr>, usize),
229    Concat(Vec<Expr>),
230    Optional(Box<Expr>, usize),
231    Repetition(Box<Expr>, u32, u32, usize),
232    Reference(usize),
233    Literal(String),
234    Regex(Regex),
235    Bytes(Vec<u8>),
236    Group(Box<Expr>),
237    Predefined(Predefined),
238}
239
240impl Expr {
241    fn add(x: Option<u64>, y: Option<u64>) -> Option<u64> {
242        x?.checked_add(y?)
243    }
244
245    fn mul(x: Option<u64>, y: Option<u64>) -> Option<u64> {
246        x?.checked_mul(y?)
247    }
248
249    /// See [`Grammar::how_many`].
250    ///
251    /// `mem` is previously calculated `how_many` for `depth - 1` for each `Reference::Expr`/rule in `rules`.
252    /// `reachable` is semi-built `Grammar.reachable`. If the expr is not reachable, `reachable[i][j]` is incremented to
253    /// finally indicate the first depth in which the jth branch in ith rule is reachable.
254    fn how_many(
255        &self,
256        rules: &[Expr],
257        mem: &[Option<u64>],
258        reachable: &mut [Vec<usize>],
259    ) -> Option<u64> {
260        match self {
261            Self::Or(x, i) => {
262                let mut res = Some(0u64);
263                for (j, child) in x.iter().enumerate() {
264                    let sub_res = child.how_many(rules, mem, reachable);
265                    let child_reachable = sub_res.map_or(true, |x| x > 0);
266                    if !child_reachable {
267                        reachable[*i][j] += 1;
268                    }
269                    res = Self::add(res, sub_res);
270                }
271                res
272            }
273            Self::Concat(x) => {
274                let mut res = Some(1u64);
275                for child in x.iter() {
276                    let sub_res = child.how_many(rules, mem, reachable);
277                    res = Self::mul(res, sub_res);
278                }
279                res
280            }
281            Self::Optional(x, i) => {
282                let child = x.how_many(rules, mem, reachable);
283                let child_reachable = child.map_or(true, |x| x > 0);
284                if !child_reachable {
285                    reachable[*i][0] += 1;
286                }
287                1u64.checked_add(child?)
288            }
289            Self::Repetition(x, min_reps, max_reps, i) => {
290                let mut res = Some(0u64);
291                let child = x.how_many(rules, mem, reachable);
292                let child_reachable = child.map_or(true, |x| x > 0);
293                if !child_reachable {
294                    reachable[*i][0] += 1;
295                }
296                match child {
297                    Some(child) if child > 1 => {
298                        for used_rep in *min_reps..=*max_reps {
299                            let (sub_res, overflow) = child.overflowing_pow(used_rep);
300                            res = Self::add(res, (!overflow).then_some(sub_res));
301                            if res.is_none() {
302                                break;
303                            }
304                        }
305                    }
306                    Some(child) if child == 1 => {
307                        // e.g. min,max = 1,3
308                        // "1", "11", "111" -- 3 options
309                        let range = *max_reps as u64 - *min_reps as u64 + 1;
310                        res = Self::add(res, Some(range));
311                    }
312                    _ => (),
313                }
314                res
315            }
316            Self::Group(x) => x.how_many(rules, mem, reachable),
317            Self::Reference(x) => mem[*x],
318            _ => Some(1),
319        }
320    }
321}
322
323fn fmt_w_name<'a>(
324    name: &'static str,
325    x: impl Iterator<Item = &'a Expr>,
326    f: &mut fmt::Formatter<'_>,
327) -> Result<(), fmt::Error> {
328    write!(
329        f,
330        "{}({})",
331        name,
332        x.into_iter()
333            .map(|x| x.to_string())
334            .collect::<Vec<String>>()
335            .join(", ")
336    )
337}
338
339impl fmt::Display for Expr {
340    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> {
341        match self {
342            Self::Or(x, _) => fmt_w_name("or", x.iter(), f)?,
343            Self::Concat(x) => fmt_w_name("concat", x.iter().rev(), f)?,
344            Self::Optional(x, _) => write!(f, "option({})", x)?,
345            Self::Repetition(x, min, max, _) => write!(f, "repeat({}, {}, {})", x, min, max)?,
346            Self::Reference(index) => write!(f, "{}", index)?,
347            Self::Literal(l) => write!(f, "{:?}", l)?,
348            Self::Regex(_) => write!(f, "regex")?, // TODO: no way to pretty print regex
349            Self::Bytes(b) => write!(f, "{:?}", b)?,
350            Self::Group(x) => write!(f, "({})", x)?,
351            Self::Predefined(p) => write!(f, "{}", p.as_str())?,
352        }
353        Ok(())
354    }
355}
356
357impl Expr {
358    /// Converts the intermediary representation into a "compiled" state machine.
359    /// Also populates `reachable`, see `Grammar.reachable`.
360    ///
361    /// Checks done:
362    /// - parses regex
363    /// - Converts rule references to rule indexes
364    fn try_new(
365        ir_expr: ir::Expr,
366        names: &[String],
367        reachable: &mut Vec<Vec<usize>>,
368    ) -> Result<Self, Error> {
369        Ok(match ir_expr {
370            ir::Expr::Or(x) => {
371                let child = x
372                    .into_iter()
373                    .map(|e| Self::try_new(e, names, reachable))
374                    .collect::<Result<Vec<_>, _>>()?;
375                reachable.push(vec![0; child.len()]);
376                Self::Or(child, reachable.len() - 1)
377            }
378            ir::Expr::Concat(x) => {
379                let mut y = x
380                    .into_iter()
381                    .map(|e| Self::try_new(e, names, reachable))
382                    .collect::<Result<Vec<_>, _>>()?;
383                y.reverse(); // reverse so that `expression` can use a stack
384                Self::Concat(y)
385            }
386            ir::Expr::Optional(x) => {
387                let child = Box::new(Self::try_new(*x, names, reachable)?);
388                reachable.push(vec![0]);
389                Self::Optional(child, reachable.len() - 1)
390            }
391            ir::Expr::Repetition(x, min, max) => {
392                let child = Box::new(Self::try_new(*x, names, reachable)?);
393                reachable.push(vec![0]);
394                Self::Repetition(child, min, max, reachable.len() - 1)
395            }
396            ir::Expr::Group(x) => Self::Group(Box::new(Self::try_new(*x, names, reachable)?)),
397            ir::Expr::Reference(name) => match names.iter().position(|n| *n == name) {
398                Some(i) => Self::Reference(i),
399                None => Self::Predefined(Predefined::from_str(&name).map_err(Error)?),
400            },
401            ir::Expr::Literal(x) => Self::Literal(x),
402            ir::Expr::Regex(r) => {
403                Self::Regex(Regex::compile(&r, 10).map_err(|e| Error(ErrorRepr::Regex(e)))?)
404            }
405            ir::Expr::Bytes(x) => Self::Bytes(x),
406        })
407    }
408}
409
410#[cfg(test)]
411mod tests {
412    use super::*;
413    use rand::{rngs::StdRng, RngCore, SeedableRng};
414    use std::hash::Hash;
415
416    #[test]
417    fn catches_duplicates() {
418        for x in [
419            r#"x: y; x: z;"#,
420            r#"x: y; x: x;"#,
421            r#"ok: x; x: y; y: x; x: x;"#,
422        ] {
423            let result: Error = x.parse::<Grammar>().unwrap_err();
424            assert_eq!(
425                result,
426                Error(ErrorRepr::DuplicateVars(["x".into()].into_iter().collect()))
427            )
428        }
429
430        for x in [
431            r#"x: y; x: z; y: x; y: y;"#,
432            r#"x: x; y: x; y: y; x: y; x: z;"#,
433        ] {
434            let result: Error = x.parse::<Grammar>().unwrap_err();
435            assert_eq!(
436                result,
437                Error(ErrorRepr::DuplicateVars(
438                    ["x".into(), "y".into()].into_iter().collect()
439                ))
440            )
441        }
442    }
443
444    #[test]
445    fn rejects_reserved() {
446        for x in [r#"u16: u16 ;"#, r#"u16: [8, 8]  ;"#] {
447            let result: Error = x.parse::<Grammar>().unwrap_err();
448            assert_eq!(
449                result,
450                Error(ErrorRepr::Reserved(["u16".into()].into_iter().collect()))
451            )
452        }
453
454        for x in [r#"String: u16 ;"#, r#"String: [8, 8]  ;"#] {
455            let result: Error = x.parse::<Grammar>().unwrap_err();
456            assert_eq!(
457                result,
458                Error(ErrorRepr::Reserved(["String".into()].into_iter().collect()))
459            )
460        }
461    }
462
463    #[test]
464    fn rejects_incorrect_ranges() {
465        for x in [
466            r#"expr: "0"{3,2} ;"#,
467            r#"expr: "0"{3,0} ;"#,
468            r#"expr: "0"a3,a} ;"#,
469            r#"expr: "0"{a,b} ;"#,
470            r#"expr: "0"{2,0} ;"#,
471            r#"expr: "0"{0,-1} ;"#,
472            r#"expr: "0"{-1,3} ;"#,
473            r#"expr: "0"{3a} ;"#,
474            r#"expr: "0"{-1} ;"#,
475            r#"expr: "0"{} ;"#,
476            r#"expr: "0"{43250750483253} ;"#,
477        ] {
478            assert!(x.parse::<Grammar>().is_err(), "{}", x);
479        }
480    }
481
482    #[test]
483    fn accepts_correct_ranges() {
484        for x in [
485            r#"expr: "0"{2 ,3} ;"#,
486            r#"expr: "0"{2, 3} ;"#,
487            r#"expr: "0"{ 2,3} ;"#,
488            r#"expr: "0"{2,3 } ;"#,
489            r#"expr: "0"{ 2 , 3  } ;"#,
490            r#"expr: "0"{ 2 } ;"#,
491            r#"expr: "0"{ 2 } ; rule: "{" ; "#,
492            r#"expr: "0"{ 2 } ; rule: "}" ; "#,
493            r#"expr: "0"{ 2 } ; rule: "{ 3 }" ; "#,
494            r#"expr: "0"{ 2 } ; rule: "expr: \"0\"{ 2 } ;" ; "#,
495        ] {
496            let res = x.parse::<Grammar>();
497            assert!(res.is_ok(), "{}\n{:?}", x, res);
498        }
499    }
500
501    fn assert_how_many_matches_generations<T: Visitor + Hash + Eq>(
502        grammar: &Grammar,
503        depth: usize,
504    ) {
505        let mut buf = [0u8; 1024];
506        let num_classes = grammar
507            .how_many(Some(depth))
508            .expect("small number of classes") as usize;
509        assert!(num_classes < 10_000);
510        let mut classes = fxhash::FxHashSet::<T>::default();
511        classes.try_reserve(num_classes).unwrap();
512
513        let mut rng = StdRng::seed_from_u64(42);
514
515        // pick `num_iterations` to reduce prob of test being accurate
516        // note, not all classes have the same probability of being picked!
517        // note, 400 is tuned
518        let num_iterations = 400 * num_classes;
519
520        for _ in 0..num_iterations {
521            rng.fill_bytes(&mut buf);
522            let mut u = Unstructured::new(&buf);
523            if let Ok(class) = grammar.expression::<T>(&mut u, Some(depth)) {
524                classes.insert(class);
525            }
526        }
527        assert_eq!(classes.len(), num_classes);
528    }
529
530    #[test]
531    fn how_many_or() {
532        let grammar: Grammar = r#"num : "1" | "2" ;"#.parse().unwrap();
533
534        for max in 1..=10 {
535            assert_eq!(grammar.how_many(Some(max)), Some(2));
536        }
537        assert_eq!(grammar.how_many(None), Some(2));
538        assert_how_many_matches_generations::<u64>(&grammar, 1);
539    }
540
541    #[test]
542    fn how_many_concat() {
543        let grammar: Grammar = r#"
544            expr : "1" | "2" | num ;
545            num  : ("3" | "4") "." ("5" | "6") ;
546        "#
547        .parse()
548        .unwrap();
549
550        // only a depth of 1 is allowed, so "1" or "2"
551        assert_eq!(grammar.how_many(Some(1)), Some(2));
552        assert_how_many_matches_generations::<u64>(&grammar, 1);
553
554        // 2 + (2 * 2)
555        // 2: "1" or "2"
556        // 2 * 2: ("3" or "4") * ("5" or "6")
557        assert_eq!(grammar.how_many(Some(2)), Some(6));
558        assert_how_many_matches_generations::<u64>(&grammar, 2);
559
560        let grammar: Grammar = r#"
561            expr : "1" | "2" | num? ;
562            num  : ("3" | "4") "." ("5" | "6") ;
563        "#
564        .parse()
565        .unwrap();
566        assert_eq!(grammar.how_many(Some(2)), Some(7));
567        assert_how_many_matches_generations::<u64>(&grammar, 2);
568        assert_eq!(grammar.how_many(None), Some(7));
569    }
570
571    #[test]
572    fn how_many_static_reps() {
573        let grammar: Grammar = r#"
574            expr : num{6} ;
575            num  : "0" | "1" ;
576        "#
577        .parse()
578        .unwrap();
579
580        // 2 choices, exactly 6 times... 2^6 = 64
581        assert_eq!(grammar.how_many(Some(2)), Some(64));
582        assert_eq!(grammar.how_many(None), Some(64));
583        assert_how_many_matches_generations::<u64>(&grammar, 2);
584    }
585
586    #[test]
587    fn how_many_bounded_reps() {
588        let grammar: Grammar = r#"
589            expr : num{0,6} ;
590            num  : "0" | "1" ;
591        "#
592        .parse()
593        .unwrap();
594
595        // 0 reps: 1
596        // 1 reps: 2 ("0" or "1")
597        // 2 reps: 2^2
598        // 3 reps: 2^3
599        // ...
600        assert_eq!(grammar.how_many(Some(2)), Some(127));
601        assert_eq!(grammar.how_many(None), Some(127));
602        assert_how_many_matches_generations::<u64>(&grammar, 2);
603    }
604
605    #[test]
606    fn how_many_inf_reps() {
607        let grammar: Grammar = r#"
608            expr : num* ;
609            num  : "0" | "1" ;
610        "#
611        .parse()
612        .unwrap();
613
614        assert_eq!(grammar.how_many(Some(2)), None);
615        assert_eq!(grammar.how_many(None), None);
616
617        let grammar: Grammar = r#"
618            expr : num* ;
619            num  : "0" ;
620        "#
621        .parse()
622        .unwrap();
623
624        assert_eq!(
625            grammar.how_many(Some(2)),
626            Some(crate::MAX_REPEAT as u64 + 1)
627        );
628        assert_eq!(grammar.how_many(None), Some(crate::MAX_REPEAT as u64 + 1));
629
630        let grammar: Grammar = r#"
631            expr : num* ;
632            num  : "0" | r"[a-z]{2,3}" ;
633        "#
634        .parse()
635        .unwrap();
636
637        assert_eq!(grammar.how_many(Some(2)), None);
638        assert_eq!(grammar.how_many(None), None);
639    }
640
641    #[test]
642    fn how_many_choice() {
643        let grammar: Grammar = r#"expr : "1"? ;"#.parse().unwrap();
644        assert_eq!(grammar.how_many(Some(2)), Some(2));
645        assert_how_many_matches_generations::<u64>(&grammar, 2);
646
647        let grammar: Grammar = r#"expr : ("1" | "2" )? ;"#.parse().unwrap();
648        assert_eq!(grammar.how_many(Some(2)), Some(3));
649        assert_how_many_matches_generations::<u64>(&grammar, 2);
650
651        // "1", "2", "", or "" (outer)
652        let grammar: Grammar = r#"expr : ("1" | "2"? )? ;"#.parse().unwrap();
653        assert_eq!(grammar.how_many(Some(2)), Some(4));
654        assert_how_many_matches_generations::<u64>(&grammar, 2);
655        assert_eq!(grammar.how_many(None), Some(4));
656    }
657
658    #[test]
659    fn how_many_simpler_math() {
660        let grammar: Grammar = r#"
661            expr   : u16 | expr symbol expr ;
662            symbol : "+" | "-" | "*" | "/" ;
663        "#
664        .parse()
665        .unwrap();
666
667        assert_eq!(grammar.how_many(Some(1)), Some(1));
668        assert_how_many_matches_generations::<u64>(&grammar, 1);
669
670        // a singular number
671        // two numbers with 4 possible operators
672        // 1 + 4
673        assert_eq!(grammar.how_many(Some(2)), Some(5));
674        assert_how_many_matches_generations::<u64>(&grammar, 2);
675
676        // combinations are:
677        // 1 singular number
678        // + 5 possible expressions (from above) on the left
679        // * 4 possible symbols
680        // * 5 possible expressions on the right
681        assert_eq!(grammar.how_many(Some(3)), Some(101));
682        assert_how_many_matches_generations::<u64>(&grammar, 3);
683        assert_eq!(grammar.how_many(None), None);
684    }
685
686    #[test]
687    fn how_many_with_prefined() {
688        let grammar: Grammar = r#"
689            one : "1" | "2" | (u16 | String)? | u16? | (u16 | String){6} ;
690        "#
691        .parse()
692        .unwrap();
693        assert_how_many_matches_generations::<u64>(&grammar, 2);
694    }
695
696    #[test]
697    fn how_many_simple_math() {
698        let grammar: Grammar = r#"
699            expr   : num | expr symbol expr ;
700            symbol : "+" | "-" | "*" | "/" ;
701            num    : nested | "1" | "2" ;
702            nested : u16;
703        "#
704        .parse()
705        .unwrap();
706
707        assert_eq!(grammar.how_many(Some(1)), Some(0));
708        assert_how_many_matches_generations::<u64>(&grammar, 1);
709        // only "1" and "2" are reachable, `nested` is a reference
710        // and needs one more depth!
711        assert_eq!(grammar.how_many(Some(2)), Some(2));
712        assert_how_many_matches_generations::<u64>(&grammar, 2);
713
714        // 3 + 2 * 4 * 2
715        assert_eq!(grammar.how_many(Some(3)), Some(19));
716        assert_how_many_matches_generations::<u64>(&grammar, 3);
717
718        // This grammar is infinitely recursive
719        assert_eq!(grammar.how_many(None), None);
720    }
721
722    #[test]
723    fn how_many_nesting() {
724        let grammar: Grammar = r#"
725            one    : "1" | two ;
726            two    : "2" | three ;
727            three  : "3" ;
728        "#
729        .parse()
730        .unwrap();
731        assert_eq!(grammar.how_many(Some(10)), Some(3));
732        for depth in 0..=3 {
733            assert_how_many_matches_generations::<u64>(&grammar, depth);
734            assert_how_many_matches_generations::<String>(&grammar, depth);
735        }
736    }
737
738    fn success_count(grammar: &Grammar, depth: usize, total: usize) -> usize {
739        let mut buf = [0u8; 1024];
740        let mut rng = StdRng::seed_from_u64(42);
741
742        let mut valid = 0;
743        for _ in 0..total {
744            rng.fill_bytes(&mut buf);
745            let mut u = Unstructured::new(&buf);
746            valid += grammar.expression::<u64>(&mut u, Some(depth)).is_ok() as usize;
747        }
748        valid
749    }
750
751    #[test]
752    fn avoid_long_expr_opt_ref() {
753        let grammar: Grammar = r#"
754            one : two?;
755            two : "2" ;
756        "#
757        .parse()
758        .unwrap();
759
760        for depth in 1..=4 {
761            let valid = success_count(&grammar, depth, 100);
762            assert_eq!(valid, 100);
763            assert_how_many_matches_generations::<u64>(&grammar, depth);
764            assert_how_many_matches_generations::<String>(&grammar, depth);
765        }
766    }
767
768    #[test]
769    fn avoid_long_expr_opt_hardcoded() {
770        let grammar: Grammar = r#"
771            one : "2"?;
772        "#
773        .parse()
774        .unwrap();
775
776        for depth in 1..=4 {
777            let valid = success_count(&grammar, depth, 100);
778            assert_eq!(valid, 100);
779            assert_how_many_matches_generations::<u64>(&grammar, depth);
780            assert_how_many_matches_generations::<String>(&grammar, depth);
781        }
782    }
783
784    #[test]
785    fn avoid_long_expr_opt_hardcoded_paren() {
786        let grammar: Grammar = r#"
787            one : ("2")?;
788        "#
789        .parse()
790        .unwrap();
791
792        for depth in 1..=4 {
793            let valid = success_count(&grammar, depth, 100);
794            assert_eq!(valid, 100);
795            assert_how_many_matches_generations::<u64>(&grammar, depth);
796            assert_how_many_matches_generations::<String>(&grammar, depth);
797        }
798    }
799
800    #[test]
801    fn avoid_long_expr_or() {
802        let grammar: Grammar = r#"
803            one : "1" | two ;
804            two : "2" | three ;
805            three : "3" ;
806        "#
807        .parse()
808        .unwrap();
809
810        for depth in 1..=4 {
811            let valid = success_count(&grammar, depth, 100);
812            assert_eq!(valid, 100);
813            assert_how_many_matches_generations::<u64>(&grammar, depth);
814            assert_how_many_matches_generations::<String>(&grammar, depth);
815        }
816    }
817
818    #[test]
819    fn avoid_long_expr_rep_0_or_more() {
820        let grammar: Grammar = r#"
821            one : "1" | two{4} ;
822            two : "2";
823        "#
824        .parse()
825        .unwrap();
826
827        assert_eq!(grammar.how_many(Some(1)), Some(1));
828
829        for depth in 1..=4 {
830            let valid = success_count(&grammar, depth, 100);
831            assert_eq!(valid, 100);
832            assert_how_many_matches_generations::<u64>(&grammar, depth);
833            assert_how_many_matches_generations::<String>(&grammar, depth);
834        }
835    }
836
837    #[test]
838    fn avoid_long_expr_rep_1_or_more() {
839        let grammar: Grammar = r#"
840            one : "1" | two{1,3} ;
841            two : "2";
842        "#
843        .parse()
844        .unwrap();
845
846        for depth in 1..=4 {
847            let valid = success_count(&grammar, depth, 100);
848            assert_eq!(valid, 100);
849            assert_how_many_matches_generations::<u64>(&grammar, depth);
850            assert_how_many_matches_generations::<String>(&grammar, depth);
851        }
852    }
853
854    #[test]
855    fn avoid_impossible_deep_ref() {
856        let grammar: Grammar = r#"
857            one : two ;
858            two : three ;
859            three : "3";
860        "#
861        .parse()
862        .unwrap();
863
864        for depth in 0..=2 {
865            let valid = success_count(&grammar, depth, 100);
866            assert_eq!(valid, 0);
867            assert_how_many_matches_generations::<u64>(&grammar, depth);
868            assert_how_many_matches_generations::<String>(&grammar, depth);
869        }
870        let valid = success_count(&grammar, 3, 100);
871        assert_eq!(valid, 100);
872        assert_how_many_matches_generations::<u64>(&grammar, 3);
873        assert_how_many_matches_generations::<String>(&grammar, 3);
874    }
875
876    #[test]
877    fn avoid_example() {
878        let grammar: Grammar = r#"
879            one : "1" | two three ;
880            two : "2" "2"? ;
881            three : three_inner ;
882            three_inner : "3"   ;
883        "#
884        .parse()
885        .unwrap();
886
887        for depth in 1..=8 {
888            let valid = success_count(&grammar, depth, 100);
889            assert_eq!(valid, 100);
890            assert_how_many_matches_generations::<u64>(&grammar, depth);
891        }
892    }
893
894    #[test]
895    fn avoid_mixed_branches() {
896        let grammar: Grammar = r#"
897            expr : "qwerty"{2,4} | "4" | (two)? ;
898            two : "5"{3} | three | three four? ;
899            three : two | three ;
900            four  : "4" ;
901        "#
902        .parse()
903        .unwrap();
904
905        for depth in 1..=6 {
906            assert_how_many_matches_generations::<u64>(&grammar, depth);
907        }
908
909        for depth in 1..=30 {
910            let valid = success_count(&grammar, depth, 10);
911            assert_eq!(valid, 10);
912        }
913    }
914}