spindle_lib/
grammar.rs

1use crate::error::ErrorRepr;
2use crate::Error;
3use crate::{ir, regex::Regex, reserved::*, Visitor};
4
5use arbitrary::{Arbitrary, Unstructured};
6use std::{collections::HashSet, fmt, str::FromStr};
7
8/// A state machine that produces `Arbitrary` matching expressions or byte sequences from [`Unstructured`](https://docs.rs/arbitrary/latest/arbitrary/struct.Unstructured.html).
9///
10/// # Implementation
11/// ## Construction
12/// `Grammar` is constructed using `from_str` of a string that specifies the syntax of expressions:
13/// - A peg parser converts the string into an "intermediary representation" AST (in ir.rs).
14/// - The IR is validated for duplicate and conflicting rule definitions
15/// - The IR is converted to a state machine in which regex is parsed and rule definitions are indexed.
16///
17/// ## Expression Generation
18/// `Unstructured` is used to generate `arbitrary` choices and loops to traverse the state machine.
19/// When a "leaf" state is reached, e.g. a pre-defined rule, regex, or string literal, `Unstructured`
20/// is used to write arbitrary data (except for string literals) to an output buffer.
21#[derive(Debug)]
22pub struct Grammar {
23    rules: Vec<Expr>,
24
25    /// `rule_names[i]` = name the ith rule definition, as defined by the user in `FromStr`.
26    rule_names: Box<[Box<str>]>,
27
28    /// `how_many[i]` == number of possible traversals at the ith depth
29    /// i.e. memoized results of `how_many` calculated on construction.
30    how_many: Vec<Option<u64>>,
31
32    /// `reachable[i][j]` == first depth that is reachable by the `i`th branch `Expr`'s `j`th branch.
33    /// Branch `Expr`s are `Expr::Optional`, `Expr::Or`, `Expr::Repetition`.
34    /// `reachable` is used in `expression`: branches that are not reachable at the current depth are not explored.
35    reachable: Vec<Vec<usize>>,
36}
37
38impl Grammar {
39    /// Returns a resulting `Visitor` after an arbitirary state machine traversal.
40    ///
41    /// The state machine traversal starts at the first rule in the grammar.
42    ///
43    /// # Parameters
44    /// `max_depth` is nesting limit of rule referencing during the traversal.
45    /// For example, the below grammar generates numbers less than or equal to the `max_depth` parameter.
46    /// ```text
47    /// one   : "1" | two ;
48    /// two   : "2" | three ;
49    /// three : "3" ;
50    /// ```
51    pub fn expression<V: Visitor>(
52        &self,
53        u: &mut Unstructured<'_>,
54        max_depth: Option<usize>,
55    ) -> arbitrary::Result<V> {
56        let mut visitor = V::new();
57        let mut to_write = vec![(&self.rules[0], max_depth.unwrap_or(usize::MAX))]; // always start at first rule
58
59        while let Some((expr, depth)) = to_write.pop() {
60            if depth == 0 {
61                return Err(arbitrary::Error::IncorrectFormat);
62            }
63
64            match expr {
65                Expr::Or(v, i) => {
66                    // TODO: might be better way to choose a random item from a filtered list
67                    // will explore ways to avoid the vec allocation when we have benchmarks.
68                    let avail: Vec<_> = (0..v.len())
69                        .filter(|j| self.reachable(depth, *i, *j))
70                        .collect();
71                    let arb_index = u.choose_iter(avail.iter())?;
72                    to_write.push((&v[*arb_index], depth));
73                    visitor.visit_or(*arb_index);
74                }
75                Expr::Concat(v) => {
76                    to_write.extend(v.iter().map(|x| (x, depth)));
77                    visitor.visit_concat();
78                }
79                Expr::Optional(x, i) => {
80                    let b = self.reachable(depth, *i, 0) && bool::arbitrary(u)?;
81                    if b {
82                        to_write.push((x, depth));
83                    }
84                    visitor.visit_optional(b);
85                }
86                Expr::Repetition(x, min_rep, max_rep, i) => {
87                    let mut reps = 0;
88                    if self.reachable(depth, *i, 0) {
89                        u.arbitrary_loop(Some(*min_rep), Some(*max_rep), |_| {
90                            to_write.push((x, depth));
91                            reps += 1;
92                            Ok(std::ops::ControlFlow::Continue(()))
93                        })?;
94                    }
95                    visitor.visit_repetition(reps);
96                }
97                Expr::Reference(index) => {
98                    to_write.push((&self.rules[*index], depth - 1));
99                    visitor.visit_reference(&self.rule_names[*index], *index);
100                }
101                Expr::Literal(s) => visitor.visit_literal(s.as_str()),
102                Expr::Bytes(b) => visitor.visit_bytes(b),
103                Expr::Regex(regex) => regex.visit(&mut visitor, u)?,
104                Expr::Group(x) => to_write.push((x, depth)),
105                Expr::Predefined(v) => v.visit(&mut visitor, u)?,
106            }
107        }
108        Ok(visitor)
109    }
110
111    /// Returns `true` if the `i`th branch `Expr`s `j`th branch is reachable at `depth`.
112    fn reachable(&self, depth: usize, i: usize, j: usize) -> bool {
113        depth > self.reachable[i][j]
114    }
115
116    /// Returns the number of possible state machine traversals for
117    /// this state machine or `None` if the result exceeds `u64::MAX`.
118    ///
119    /// In other words, `grammar.how_many(depth)` is the number of unique values possible from `grammar.expression::<u64>(u, depth)` (barring hash collisions).
120    ///
121    /// # Parameters
122    /// `max_depth` is the maximum nested references that the traversal is allowed.
123    /// A `max_depth` of 0 will always return 0.
124    ///
125    /// # Usage
126    /// Provides a rough possible number of equivalence classes of the grammar,
127    /// which is useful for estimating overall coverage as the fuzzer discovers more
128    /// classes over time. Equivalence classes summarize the space of all inputs based on
129    /// high level strcuture, not the actual data of terminal nodes.
130    /// A large `how_many` means achieving full coverage may challenge your time constraints.
131    /// A small `how_many` signals that you could increase the complexity or granularity of your grammar.
132    ///
133    /// # Limitations
134    /// 1. No traversals are counted inside of Regex and pre-defined (e.g. String) rules
135    /// i.e. are counted as 1 possible traversal even though many regex expressions or Strings
136    /// are possible.
137    ///
138    /// 2. The result is not aware of duplicate outputs, e.g.
139    /// ```text
140    /// expr : "foo" | "foo" ;
141    /// ```
142    /// counts as 2 traversals, even though every output is "foo".
143    pub fn how_many(&self, max_depth: Option<usize>) -> Option<u64> {
144        let target_depth = std::cmp::min(max_depth.unwrap_or(usize::MAX), self.how_many.len() - 1);
145        self.how_many[target_depth]
146    }
147}
148
149/// Pretty prints the state machine.
150///
151/// It's helpful to check if the compiled state machine matches
152/// what is expected from the the un-parsed grammar
153/// (the printed rules are more verbose and order of operations is clearer)
154impl fmt::Display for Grammar {
155    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> {
156        for (i, rule) in self.rules.iter().enumerate() {
157            writeln!(f, "{}: {}", i, rule)?;
158        }
159        Ok(())
160    }
161}
162
163impl FromStr for Grammar {
164    type Err = Error;
165
166    fn from_str(s: &str) -> Result<Self, Self::Err> {
167        let parsed_ir = ir::bnf::expr(s).map_err(|e| Error(ErrorRepr::Grammar(e)))?;
168        Self::try_from(parsed_ir)
169    }
170}
171
172impl TryFrom<Vec<(String, ir::Expr)>> for Grammar {
173    type Error = Error;
174
175    fn try_from(value: Vec<(String, ir::Expr)>) -> Result<Self, Self::Error> {
176        let (names, ir_exprs): (Vec<String>, Vec<ir::Expr>) = value.into_iter().unzip();
177
178        if let Some(dups) = find_duplicates(&names) {
179            return Err(Error(ErrorRepr::DuplicateVars(dups)));
180        }
181
182        if let Some(res) = filter_reserved_keywords(&names) {
183            return Err(Error(ErrorRepr::Reserved(res)));
184        }
185
186        let mut reachable: Vec<Vec<usize>> = Vec::new();
187        let rules = ir_exprs
188            .into_iter()
189            .map(|ir_expr| Expr::try_new(ir_expr, &names, &mut reachable))
190            .collect::<Result<Vec<Expr>, _>>()?;
191
192        // Use a bottom up approach for calculating the number of traversals:
193        // 1. all rules have 0 traversals at depth 0.
194        // 2. prev[i] == `how_many` for the ith rule at the previously calculated depth.
195        // 3. dp[i] == `how_many` for the ith rule the current depth which depends on `prev`.
196        // 4. finally return `how_many` for the first (start) rule.
197        let mut how_many = vec![Some(0u64)];
198        let mut prev = vec![Some(0u64); rules.len()];
199        loop {
200            let dp: Vec<_> = rules
201                .iter()
202                .map(|r| r.how_many(&rules, &prev, &mut reachable))
203                .collect();
204
205            how_many.push(dp[0]);
206
207            // discovered all possible or already exceeded `u64::MAX`
208            if dp == prev || dp[0] == None {
209                break;
210            }
211
212            prev = dp;
213        }
214        assert_eq!(names.len(), rules.len());
215        let rule_names = names
216            .into_iter()
217            .map(|x| x.into())
218            .collect::<Vec<Box<str>>>()
219            .into();
220        Ok(Self {
221            rules,
222            rule_names,
223            reachable,
224            how_many,
225        })
226    }
227}
228
229fn find_duplicates(names: &[String]) -> Option<HashSet<String>> {
230    let mut set: HashSet<String> = names.iter().cloned().collect();
231    let dups: HashSet<String> = names.iter().filter(|&n| !set.remove(n)).cloned().collect();
232    (!dups.is_empty()).then_some(dups)
233}
234
235/// Branch `Expr`s (`Optional`, `Or`, `Repetition`) contain a `usize`
236/// which is index of the branch in the state machine (pre-ordered).
237#[derive(Debug)]
238enum Expr {
239    Or(Vec<Expr>, usize),
240    Concat(Vec<Expr>),
241    Optional(Box<Expr>, usize),
242    Repetition(Box<Expr>, u32, u32, usize),
243    Reference(usize),
244    Literal(String),
245    Regex(Regex),
246    Bytes(Vec<u8>),
247    Group(Box<Expr>),
248    Predefined(Predefined),
249}
250
251impl Expr {
252    fn add(x: Option<u64>, y: Option<u64>) -> Option<u64> {
253        x?.checked_add(y?)
254    }
255
256    fn mul(x: Option<u64>, y: Option<u64>) -> Option<u64> {
257        x?.checked_mul(y?)
258    }
259
260    /// See [`Grammar::how_many`].
261    ///
262    /// `mem` is previously calculated `how_many` for `depth - 1` for each `Reference::Expr`/rule in `rules`.
263    /// `reachable` is semi-built `Grammar.reachable`. If the expr is not reachable, `reachable[i][j]` is incremented to
264    /// finally indicate the first depth in which the jth branch in ith rule is reachable.
265    fn how_many(
266        &self,
267        rules: &[Expr],
268        mem: &[Option<u64>],
269        reachable: &mut [Vec<usize>],
270    ) -> Option<u64> {
271        match self {
272            Self::Or(x, i) => {
273                let mut res = Some(0u64);
274                for (j, child) in x.iter().enumerate() {
275                    let sub_res = child.how_many(rules, mem, reachable);
276                    let child_reachable = sub_res.map_or(true, |x| x > 0);
277                    if !child_reachable {
278                        reachable[*i][j] += 1;
279                    }
280                    res = Self::add(res, sub_res);
281                }
282                res
283            }
284            Self::Concat(x) => {
285                let mut res = Some(1u64);
286                for child in x.iter() {
287                    let sub_res = child.how_many(rules, mem, reachable);
288                    res = Self::mul(res, sub_res);
289                }
290                res
291            }
292            Self::Optional(x, i) => {
293                let child = x.how_many(rules, mem, reachable);
294                let child_reachable = child.map_or(true, |x| x > 0);
295                if !child_reachable {
296                    reachable[*i][0] += 1;
297                }
298                1u64.checked_add(child?)
299            }
300            Self::Repetition(x, min_reps, max_reps, i) => {
301                let mut res = Some(0u64);
302                let child = x.how_many(rules, mem, reachable);
303                let child_reachable = child.map_or(true, |x| x > 0);
304                if !child_reachable {
305                    reachable[*i][0] += 1;
306                }
307                match child {
308                    Some(child) if child > 1 => {
309                        for used_rep in *min_reps..=*max_reps {
310                            let (sub_res, overflow) = child.overflowing_pow(used_rep);
311                            res = Self::add(res, (!overflow).then_some(sub_res));
312                            if res.is_none() {
313                                break;
314                            }
315                        }
316                    }
317                    Some(child) if child == 1 => {
318                        // e.g. min,max = 1,3
319                        // "1", "11", "111" -- 3 options
320                        let range = *max_reps as u64 - *min_reps as u64 + 1;
321                        res = Self::add(res, Some(range));
322                    }
323                    _ => (),
324                }
325                res
326            }
327            Self::Group(x) => x.how_many(rules, mem, reachable),
328            Self::Reference(x) => mem[*x],
329            _ => Some(1),
330        }
331    }
332}
333
334fn fmt_w_name<'a>(
335    name: &'static str,
336    x: impl Iterator<Item = &'a Expr>,
337    f: &mut fmt::Formatter<'_>,
338) -> Result<(), fmt::Error> {
339    write!(
340        f,
341        "{}({})",
342        name,
343        x.into_iter()
344            .map(|x| x.to_string())
345            .collect::<Vec<String>>()
346            .join(", ")
347    )
348}
349
350impl fmt::Display for Expr {
351    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> {
352        match self {
353            Self::Or(x, _) => fmt_w_name("or", x.iter(), f)?,
354            Self::Concat(x) => fmt_w_name("concat", x.iter().rev(), f)?,
355            Self::Optional(x, _) => write!(f, "option({})", x)?,
356            Self::Repetition(x, min, max, _) => write!(f, "repeat({}, {}, {})", x, min, max)?,
357            Self::Reference(index) => write!(f, "{}", index)?,
358            Self::Literal(l) => write!(f, "{:?}", l)?,
359            Self::Regex(_) => write!(f, "regex")?, // TODO: no way to pretty print regex
360            Self::Bytes(b) => write!(f, "{:?}", b)?,
361            Self::Group(x) => write!(f, "({})", x)?,
362            Self::Predefined(p) => write!(f, "{}", p.as_str())?,
363        }
364        Ok(())
365    }
366}
367
368impl Expr {
369    /// Converts the intermediary representation into a "compiled" state machine.
370    /// Also populates `reachable`, see `Grammar.reachable`.
371    ///
372    /// Checks done:
373    /// - parses regex
374    /// - Converts rule references to rule indexes
375    fn try_new(
376        ir_expr: ir::Expr,
377        names: &[String],
378        reachable: &mut Vec<Vec<usize>>,
379    ) -> Result<Self, Error> {
380        Ok(match ir_expr {
381            ir::Expr::Or(x) => {
382                let child = x
383                    .into_iter()
384                    .map(|e| Self::try_new(e, names, reachable))
385                    .collect::<Result<Vec<_>, _>>()?;
386                reachable.push(vec![0; child.len()]);
387                Self::Or(child, reachable.len() - 1)
388            }
389            ir::Expr::Concat(x) => {
390                let mut y = x
391                    .into_iter()
392                    .map(|e| Self::try_new(e, names, reachable))
393                    .collect::<Result<Vec<_>, _>>()?;
394                y.reverse(); // reverse so that `expression` can use a stack
395                Self::Concat(y)
396            }
397            ir::Expr::Optional(x) => {
398                let child = Box::new(Self::try_new(*x, names, reachable)?);
399                reachable.push(vec![0]);
400                Self::Optional(child, reachable.len() - 1)
401            }
402            ir::Expr::Repetition(x, min, max) => {
403                let child = Box::new(Self::try_new(*x, names, reachable)?);
404                reachable.push(vec![0]);
405                Self::Repetition(child, min, max, reachable.len() - 1)
406            }
407            ir::Expr::Group(x) => Self::Group(Box::new(Self::try_new(*x, names, reachable)?)),
408            ir::Expr::Reference(name) => match names.iter().position(|n| *n == name) {
409                Some(i) => Self::Reference(i),
410                None => Self::Predefined(Predefined::from_str(&name).map_err(Error)?),
411            },
412            ir::Expr::Literal(x) => Self::Literal(x),
413            ir::Expr::Regex(r) => {
414                Self::Regex(Regex::compile(&r, 10).map_err(|e| Error(ErrorRepr::Regex(e)))?)
415            }
416            ir::Expr::Bytes(x) => Self::Bytes(x),
417        })
418    }
419}
420
421#[cfg(test)]
422mod tests {
423    use super::*;
424    use rand::{rngs::StdRng, RngCore, SeedableRng};
425    use std::hash::Hash;
426
427    #[test]
428    fn catches_duplicates() {
429        for x in [
430            r#"x: y; x: z;"#,
431            r#"x: y; x: x;"#,
432            r#"ok: x; x: y; y: x; x: x;"#,
433        ] {
434            let result: Error = x.parse::<Grammar>().unwrap_err();
435            assert_eq!(
436                result,
437                Error(ErrorRepr::DuplicateVars(["x".into()].into_iter().collect()))
438            )
439        }
440
441        for x in [
442            r#"x: y; x: z; y: x; y: y;"#,
443            r#"x: x; y: x; y: y; x: y; x: z;"#,
444        ] {
445            let result: Error = x.parse::<Grammar>().unwrap_err();
446            assert_eq!(
447                result,
448                Error(ErrorRepr::DuplicateVars(
449                    ["x".into(), "y".into()].into_iter().collect()
450                ))
451            )
452        }
453    }
454
455    #[test]
456    fn rejects_reserved() {
457        for x in [r#"u16: u16 ;"#, r#"u16: [8, 8]  ;"#] {
458            let result: Error = x.parse::<Grammar>().unwrap_err();
459            assert_eq!(
460                result,
461                Error(ErrorRepr::Reserved(["u16".into()].into_iter().collect()))
462            )
463        }
464
465        for x in [r#"String: u16 ;"#, r#"String: [8, 8]  ;"#] {
466            let result: Error = x.parse::<Grammar>().unwrap_err();
467            assert_eq!(
468                result,
469                Error(ErrorRepr::Reserved(["String".into()].into_iter().collect()))
470            )
471        }
472    }
473
474    #[test]
475    fn rejects_incorrect_ranges() {
476        for x in [
477            r#"expr: "0"{3,2} ;"#,
478            r#"expr: "0"{3,0} ;"#,
479            r#"expr: "0"a3,a} ;"#,
480            r#"expr: "0"{a,b} ;"#,
481            r#"expr: "0"{2,0} ;"#,
482            r#"expr: "0"{0,-1} ;"#,
483            r#"expr: "0"{-1,3} ;"#,
484            r#"expr: "0"{3a} ;"#,
485            r#"expr: "0"{-1} ;"#,
486            r#"expr: "0"{} ;"#,
487            r#"expr: "0"{43250750483253} ;"#,
488        ] {
489            assert!(x.parse::<Grammar>().is_err(), "{}", x);
490        }
491    }
492
493    #[test]
494    fn accepts_correct_ranges() {
495        for x in [
496            r#"expr: "0"{2 ,3} ;"#,
497            r#"expr: "0"{2, 3} ;"#,
498            r#"expr: "0"{ 2,3} ;"#,
499            r#"expr: "0"{2,3 } ;"#,
500            r#"expr: "0"{ 2 , 3  } ;"#,
501            r#"expr: "0"{ 2 } ;"#,
502            r#"expr: "0"{ 2 } ; rule: "{" ; "#,
503            r#"expr: "0"{ 2 } ; rule: "}" ; "#,
504            r#"expr: "0"{ 2 } ; rule: "{ 3 }" ; "#,
505            r#"expr: "0"{ 2 } ; rule: "expr: \"0\"{ 2 } ;" ; "#,
506        ] {
507            let res = x.parse::<Grammar>();
508            assert!(res.is_ok(), "{}\n{:?}", x, res);
509        }
510    }
511
512    fn assert_how_many_matches_generations<T: Visitor + Hash + Eq>(
513        grammar: &Grammar,
514        depth: usize,
515    ) {
516        let mut buf = [0u8; 1024];
517        let num_classes = grammar
518            .how_many(Some(depth))
519            .expect("small number of classes") as usize;
520        assert!(num_classes < 10_000);
521        let mut classes = fxhash::FxHashSet::<T>::default();
522        classes.try_reserve(num_classes).unwrap();
523
524        let mut rng = StdRng::seed_from_u64(42);
525
526        // pick `num_iterations` to reduce prob of test being accurate
527        // note, not all classes have the same probability of being picked!
528        // note, 400 is tuned
529        let num_iterations = 400 * num_classes;
530
531        for _ in 0..num_iterations {
532            rng.fill_bytes(&mut buf);
533            let mut u = Unstructured::new(&buf);
534            if let Ok(class) = grammar.expression::<T>(&mut u, Some(depth)) {
535                classes.insert(class);
536            }
537        }
538        assert_eq!(classes.len(), num_classes);
539    }
540
541    #[test]
542    fn how_many_or() {
543        let grammar: Grammar = r#"num : "1" | "2" ;"#.parse().unwrap();
544
545        for max in 1..=10 {
546            assert_eq!(grammar.how_many(Some(max)), Some(2));
547        }
548        assert_eq!(grammar.how_many(None), Some(2));
549        assert_how_many_matches_generations::<u64>(&grammar, 1);
550    }
551
552    #[test]
553    fn how_many_concat() {
554        let grammar: Grammar = r#"
555            expr : "1" | "2" | num ;
556            num  : ("3" | "4") "." ("5" | "6") ;
557        "#
558        .parse()
559        .unwrap();
560
561        // only a depth of 1 is allowed, so "1" or "2"
562        assert_eq!(grammar.how_many(Some(1)), Some(2));
563        assert_how_many_matches_generations::<u64>(&grammar, 1);
564
565        // 2 + (2 * 2)
566        // 2: "1" or "2"
567        // 2 * 2: ("3" or "4") * ("5" or "6")
568        assert_eq!(grammar.how_many(Some(2)), Some(6));
569        assert_how_many_matches_generations::<u64>(&grammar, 2);
570
571        let grammar: Grammar = r#"
572            expr : "1" | "2" | num? ;
573            num  : ("3" | "4") "." ("5" | "6") ;
574        "#
575        .parse()
576        .unwrap();
577        assert_eq!(grammar.how_many(Some(2)), Some(7));
578        assert_how_many_matches_generations::<u64>(&grammar, 2);
579        assert_eq!(grammar.how_many(None), Some(7));
580    }
581
582    #[test]
583    fn how_many_static_reps() {
584        let grammar: Grammar = r#"
585            expr : num{6} ;
586            num  : "0" | "1" ;
587        "#
588        .parse()
589        .unwrap();
590
591        // 2 choices, exactly 6 times... 2^6 = 64
592        assert_eq!(grammar.how_many(Some(2)), Some(64));
593        assert_eq!(grammar.how_many(None), Some(64));
594        assert_how_many_matches_generations::<u64>(&grammar, 2);
595    }
596
597    #[test]
598    fn how_many_bounded_reps() {
599        let grammar: Grammar = r#"
600            expr : num{0,6} ;
601            num  : "0" | "1" ;
602        "#
603        .parse()
604        .unwrap();
605
606        // 0 reps: 1
607        // 1 reps: 2 ("0" or "1")
608        // 2 reps: 2^2
609        // 3 reps: 2^3
610        // ...
611        assert_eq!(grammar.how_many(Some(2)), Some(127));
612        assert_eq!(grammar.how_many(None), Some(127));
613        assert_how_many_matches_generations::<u64>(&grammar, 2);
614    }
615
616    #[test]
617    fn how_many_inf_reps() {
618        let grammar: Grammar = r#"
619            expr : num* ;
620            num  : "0" | "1" ;
621        "#
622        .parse()
623        .unwrap();
624
625        assert_eq!(grammar.how_many(Some(2)), None);
626        assert_eq!(grammar.how_many(None), None);
627
628        let grammar: Grammar = r#"
629            expr : num* ;
630            num  : "0" ;
631        "#
632        .parse()
633        .unwrap();
634
635        assert_eq!(
636            grammar.how_many(Some(2)),
637            Some(crate::MAX_REPEAT as u64 + 1)
638        );
639        assert_eq!(grammar.how_many(None), Some(crate::MAX_REPEAT as u64 + 1));
640
641        let grammar: Grammar = r#"
642            expr : num* ;
643            num  : "0" | r"[a-z]{2,3}" ;
644        "#
645        .parse()
646        .unwrap();
647
648        assert_eq!(grammar.how_many(Some(2)), None);
649        assert_eq!(grammar.how_many(None), None);
650    }
651
652    #[test]
653    fn how_many_choice() {
654        let grammar: Grammar = r#"expr : "1"? ;"#.parse().unwrap();
655        assert_eq!(grammar.how_many(Some(2)), Some(2));
656        assert_how_many_matches_generations::<u64>(&grammar, 2);
657
658        let grammar: Grammar = r#"expr : ("1" | "2" )? ;"#.parse().unwrap();
659        assert_eq!(grammar.how_many(Some(2)), Some(3));
660        assert_how_many_matches_generations::<u64>(&grammar, 2);
661
662        // "1", "2", "", or "" (outer)
663        let grammar: Grammar = r#"expr : ("1" | "2"? )? ;"#.parse().unwrap();
664        assert_eq!(grammar.how_many(Some(2)), Some(4));
665        assert_how_many_matches_generations::<u64>(&grammar, 2);
666        assert_eq!(grammar.how_many(None), Some(4));
667    }
668
669    #[test]
670    fn how_many_simpler_math() {
671        let grammar: Grammar = r#"
672            expr   : u16 | expr symbol expr ;
673            symbol : "+" | "-" | "*" | "/" ;
674        "#
675        .parse()
676        .unwrap();
677
678        assert_eq!(grammar.how_many(Some(1)), Some(1));
679        assert_how_many_matches_generations::<u64>(&grammar, 1);
680
681        // a singular number
682        // two numbers with 4 possible operators
683        // 1 + 4
684        assert_eq!(grammar.how_many(Some(2)), Some(5));
685        assert_how_many_matches_generations::<u64>(&grammar, 2);
686
687        // combinations are:
688        // 1 singular number
689        // + 5 possible expressions (from above) on the left
690        // * 4 possible symbols
691        // * 5 possible expressions on the right
692        assert_eq!(grammar.how_many(Some(3)), Some(101));
693        assert_how_many_matches_generations::<u64>(&grammar, 3);
694        assert_eq!(grammar.how_many(None), None);
695    }
696
697    #[test]
698    fn how_many_with_prefined() {
699        let grammar: Grammar = r#"
700            one : "1" | "2" | (u16 | String)? | u16? | (u16 | String){6} ;
701        "#
702        .parse()
703        .unwrap();
704        assert_how_many_matches_generations::<u64>(&grammar, 2);
705    }
706
707    #[test]
708    fn how_many_simple_math() {
709        let grammar: Grammar = r#"
710            expr   : num | expr symbol expr ;
711            symbol : "+" | "-" | "*" | "/" ;
712            num    : nested | "1" | "2" ;
713            nested : u16;
714        "#
715        .parse()
716        .unwrap();
717
718        assert_eq!(grammar.how_many(Some(1)), Some(0));
719        assert_how_many_matches_generations::<u64>(&grammar, 1);
720        // only "1" and "2" are reachable, `nested` is a reference
721        // and needs one more depth!
722        assert_eq!(grammar.how_many(Some(2)), Some(2));
723        assert_how_many_matches_generations::<u64>(&grammar, 2);
724
725        // 3 + 2 * 4 * 2
726        assert_eq!(grammar.how_many(Some(3)), Some(19));
727        assert_how_many_matches_generations::<u64>(&grammar, 3);
728
729        // This grammar is infinitely recursive
730        assert_eq!(grammar.how_many(None), None);
731    }
732
733    #[test]
734    fn how_many_nesting() {
735        let grammar: Grammar = r#"
736            one    : "1" | two ;
737            two    : "2" | three ;
738            three  : "3" ;
739        "#
740        .parse()
741        .unwrap();
742        assert_eq!(grammar.how_many(Some(10)), Some(3));
743        for depth in 0..=3 {
744            assert_how_many_matches_generations::<u64>(&grammar, depth);
745            assert_how_many_matches_generations::<String>(&grammar, depth);
746        }
747    }
748
749    fn success_count(grammar: &Grammar, depth: usize, total: usize) -> usize {
750        let mut buf = [0u8; 1024];
751        let mut rng = StdRng::seed_from_u64(42);
752
753        let mut valid = 0;
754        for _ in 0..total {
755            rng.fill_bytes(&mut buf);
756            let mut u = Unstructured::new(&buf);
757            valid += grammar.expression::<u64>(&mut u, Some(depth)).is_ok() as usize;
758        }
759        valid
760    }
761
762    #[test]
763    fn avoid_long_expr_opt_ref() {
764        let grammar: Grammar = r#"
765            one : two?;
766            two : "2" ;
767        "#
768        .parse()
769        .unwrap();
770
771        for depth in 1..=4 {
772            let valid = success_count(&grammar, depth, 100);
773            assert_eq!(valid, 100);
774            assert_how_many_matches_generations::<u64>(&grammar, depth);
775            assert_how_many_matches_generations::<String>(&grammar, depth);
776        }
777    }
778
779    #[test]
780    fn avoid_long_expr_opt_hardcoded() {
781        let grammar: Grammar = r#"
782            one : "2"?;
783        "#
784        .parse()
785        .unwrap();
786
787        for depth in 1..=4 {
788            let valid = success_count(&grammar, depth, 100);
789            assert_eq!(valid, 100);
790            assert_how_many_matches_generations::<u64>(&grammar, depth);
791            assert_how_many_matches_generations::<String>(&grammar, depth);
792        }
793    }
794
795    #[test]
796    fn avoid_long_expr_opt_hardcoded_paren() {
797        let grammar: Grammar = r#"
798            one : ("2")?;
799        "#
800        .parse()
801        .unwrap();
802
803        for depth in 1..=4 {
804            let valid = success_count(&grammar, depth, 100);
805            assert_eq!(valid, 100);
806            assert_how_many_matches_generations::<u64>(&grammar, depth);
807            assert_how_many_matches_generations::<String>(&grammar, depth);
808        }
809    }
810
811    #[test]
812    fn avoid_long_expr_or() {
813        let grammar: Grammar = r#"
814            one : "1" | two ;
815            two : "2" | three ;
816            three : "3" ;
817        "#
818        .parse()
819        .unwrap();
820
821        for depth in 1..=4 {
822            let valid = success_count(&grammar, depth, 100);
823            assert_eq!(valid, 100);
824            assert_how_many_matches_generations::<u64>(&grammar, depth);
825            assert_how_many_matches_generations::<String>(&grammar, depth);
826        }
827    }
828
829    #[test]
830    fn avoid_long_expr_rep_0_or_more() {
831        let grammar: Grammar = r#"
832            one : "1" | two{4} ;
833            two : "2";
834        "#
835        .parse()
836        .unwrap();
837
838        assert_eq!(grammar.how_many(Some(1)), Some(1));
839
840        for depth in 1..=4 {
841            let valid = success_count(&grammar, depth, 100);
842            assert_eq!(valid, 100);
843            assert_how_many_matches_generations::<u64>(&grammar, depth);
844            assert_how_many_matches_generations::<String>(&grammar, depth);
845        }
846    }
847
848    #[test]
849    fn avoid_long_expr_rep_1_or_more() {
850        let grammar: Grammar = r#"
851            one : "1" | two{1,3} ;
852            two : "2";
853        "#
854        .parse()
855        .unwrap();
856
857        for depth in 1..=4 {
858            let valid = success_count(&grammar, depth, 100);
859            assert_eq!(valid, 100);
860            assert_how_many_matches_generations::<u64>(&grammar, depth);
861            assert_how_many_matches_generations::<String>(&grammar, depth);
862        }
863    }
864
865    #[test]
866    fn avoid_impossible_deep_ref() {
867        let grammar: Grammar = r#"
868            one : two ;
869            two : three ;
870            three : "3";
871        "#
872        .parse()
873        .unwrap();
874
875        for depth in 0..=2 {
876            let valid = success_count(&grammar, depth, 100);
877            assert_eq!(valid, 0);
878            assert_how_many_matches_generations::<u64>(&grammar, depth);
879            assert_how_many_matches_generations::<String>(&grammar, depth);
880        }
881        let valid = success_count(&grammar, 3, 100);
882        assert_eq!(valid, 100);
883        assert_how_many_matches_generations::<u64>(&grammar, 3);
884        assert_how_many_matches_generations::<String>(&grammar, 3);
885    }
886
887    #[test]
888    fn avoid_example() {
889        let grammar: Grammar = r#"
890            one : "1" | two three ;
891            two : "2" "2"? ;
892            three : three_inner ;
893            three_inner : "3"   ;
894        "#
895        .parse()
896        .unwrap();
897
898        for depth in 1..=8 {
899            let valid = success_count(&grammar, depth, 100);
900            assert_eq!(valid, 100);
901            assert_how_many_matches_generations::<u64>(&grammar, depth);
902        }
903    }
904
905    #[test]
906    fn avoid_mixed_branches() {
907        let grammar: Grammar = r#"
908            expr : "qwerty"{2,4} | "4" | (two)? ;
909            two : "5"{3} | three | three four? ;
910            three : two | three ;
911            four  : "4" ;
912        "#
913        .parse()
914        .unwrap();
915
916        for depth in 1..=6 {
917            assert_how_many_matches_generations::<u64>(&grammar, depth);
918        }
919
920        for depth in 1..=30 {
921            let valid = success_count(&grammar, depth, 10);
922            assert_eq!(valid, 10);
923        }
924    }
925}