sbnf/compiler/codegen/
lookahead.rs

1/*
2Given an SBNF expression we build a list of all valid terminals that a parser
3needs to check against, effectively the lookahead of LL1 parsers. We also track
4the stack of non-terminals used to reach each terminal, as well as the
5expressions that may follow each terminal.
6
7Say we have the expression `b` for the following grammar:
8a = ('1' | '2') 'c' ;
9b = a* ';'? ;
10
11This produces the following stacks:
12'1' remaining: 'c'
13| loop 'a'
14| b remaining: ';'?
15
16'2' remaining: 'c'
17| loop 'a'
18| b remaining: ';'?
19
20';'
21
22The expression may also be empty.
23*/
24
25use super::super::common::{Compiler, Symbol};
26use super::super::interpreter::{
27    Expression, Interpreted, Key, TerminalOptions,
28};
29use crate::sbnf::TextLocation;
30use hashbrown::HashMap;
31
32#[derive(Debug, Clone, PartialEq, Eq, Hash)]
33pub enum StackEntryData<'a> {
34    Variable { key: Key },
35    Repetition { expression: &'a Expression<'a> },
36}
37
38#[derive(Debug, Clone, PartialEq, Eq, Hash)]
39pub struct StackEntry<'a> {
40    pub data: StackEntryData<'a>,
41    pub remaining: Vec<&'a Expression<'a>>,
42}
43
44impl<'a> StackEntry<'a> {
45    pub fn with_compiler(
46        &'a self,
47        compiler: &'a Compiler,
48    ) -> StackEntryWithCompiler<'a> {
49        StackEntryWithCompiler { entry: self, compiler }
50    }
51}
52
53pub struct StackEntryWithCompiler<'a> {
54    entry: &'a StackEntry<'a>,
55    compiler: &'a Compiler,
56}
57
58impl std::fmt::Debug for StackEntryWithCompiler<'_> {
59    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
60        match self.entry.data {
61            StackEntryData::Variable { key } => {
62                write!(f, "var {}", key.with_compiler(self.compiler))?;
63            }
64            StackEntryData::Repetition { expression } => {
65                write!(
66                    f,
67                    "repetition {:?}",
68                    expression.with_compiler(self.compiler)
69                )?;
70            }
71        }
72        if !self.entry.remaining.is_empty() {
73            write!(f, " [")?;
74            for expr in &self.entry.remaining {
75                write!(f, "{:?}, ", expr.with_compiler(self.compiler))?;
76            }
77            write!(f, "]")?;
78        }
79        Ok(())
80    }
81}
82
83#[derive(Debug, Clone, PartialEq, Eq)]
84pub struct Terminal<'a> {
85    pub regex: Symbol,
86    // Only None for sentinel terminals used to track left recursion
87    pub options: Option<&'a TerminalOptions<'a>>,
88    pub remaining: Vec<&'a Expression<'a>>,
89    pub stack: Vec<StackEntry<'a>>,
90}
91
92impl<'a> Terminal<'a> {
93    fn new(regex: Symbol, options: Option<&'a TerminalOptions>) -> Self {
94        Terminal { regex, options, remaining: vec![], stack: vec![] }
95    }
96
97    fn get_last_remaining(&mut self) -> &mut Vec<&'a Expression<'a>> {
98        self.get_mut_remaining(self.stack.len())
99    }
100
101    fn get_mut_remaining(
102        &mut self,
103        index: usize,
104    ) -> &mut Vec<&'a Expression<'a>> {
105        if index == 0 {
106            &mut self.remaining
107        } else {
108            &mut self.stack[index - 1].remaining
109        }
110    }
111
112    pub fn get_remaining(&self, index: usize) -> &Vec<&'a Expression<'a>> {
113        if index == 0 {
114            &self.remaining
115        } else {
116            &self.stack[index - 1].remaining
117        }
118    }
119
120    pub fn local_key(&self, topmost: Key) -> Key {
121        for entry in &self.stack {
122            if let StackEntryData::Variable { key } = &entry.data {
123                return *key;
124            }
125        }
126
127        topmost
128    }
129
130    pub fn iter<'b>(&'b self) -> TerminalStackIterator<'a, 'b> {
131        TerminalStackIterator {
132            terminal: self,
133            index: 0,
134            size: self.stack.len() + 1,
135        }
136    }
137
138    pub fn has_any_remaining(&self) -> bool {
139        if !self.remaining.is_empty() {
140            return true;
141        }
142
143        for entry in self.stack.iter().rev() {
144            if !entry.remaining.is_empty() {
145                return true;
146            }
147        }
148
149        false
150    }
151
152    pub fn with_compiler(
153        &'a self,
154        compiler: &'a Compiler,
155    ) -> TerminalWithCompiler<'a> {
156        TerminalWithCompiler { terminal: self, compiler }
157    }
158}
159
160impl std::hash::Hash for Terminal<'_> {
161    fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
162        self.regex.hash(state);
163        self.remaining.hash(state);
164        self.stack.hash(state);
165    }
166}
167
168#[derive(Debug, Clone)]
169pub struct TerminalStackIterator<'a, 'b> {
170    terminal: &'b Terminal<'a>,
171    index: usize,
172    size: usize,
173}
174
175#[derive(Debug, Clone)]
176pub struct TerminalStackIteratorItem<'a, 'b> {
177    pub data: Option<&'b StackEntryData<'a>>,
178    pub remaining: &'b Vec<&'a Expression<'a>>,
179}
180
181impl<'a> TerminalStackIteratorItem<'a, '_> {
182    fn clone_stack_entry(&self) -> StackEntry<'a> {
183        StackEntry {
184            data: self.data.unwrap().clone(),
185            remaining: self.remaining.clone(),
186        }
187    }
188}
189
190impl<'a> PartialEq<StackEntry<'a>> for TerminalStackIteratorItem<'a, '_> {
191    fn eq(&self, other: &StackEntry<'a>) -> bool {
192        self.data.is_some_and(|d| *d == other.data)
193            && *self.remaining == other.remaining
194    }
195}
196
197impl<'a, 'b> TerminalStackIterator<'a, 'b> {
198    fn get_item(&self, index: usize) -> TerminalStackIteratorItem<'a, 'b> {
199        if index == 0 {
200            TerminalStackIteratorItem {
201                data: None,
202                remaining: &self.terminal.remaining,
203            }
204        } else {
205            let stack_entry = &self.terminal.stack[index - 1];
206
207            TerminalStackIteratorItem {
208                data: Some(&stack_entry.data),
209                remaining: &stack_entry.remaining,
210            }
211        }
212    }
213
214    fn _is_empty(&self) -> bool {
215        self.index == self.size
216    }
217}
218
219impl<'a, 'b> Iterator for TerminalStackIterator<'a, 'b> {
220    type Item = TerminalStackIteratorItem<'a, 'b>;
221
222    fn next(&mut self) -> Option<Self::Item> {
223        if self._is_empty() {
224            return None;
225        }
226
227        let result = self.get_item(self.index);
228        self.index += 1;
229        Some(result)
230    }
231
232    fn size_hint(&self) -> (usize, Option<usize>) {
233        let l = self.len();
234        (l, Some(l))
235    }
236}
237
238impl DoubleEndedIterator for TerminalStackIterator<'_, '_> {
239    fn next_back(&mut self) -> Option<Self::Item> {
240        if self._is_empty() {
241            return None;
242        }
243
244        self.size -= 1;
245        Some(self.get_item(self.size))
246    }
247}
248
249impl ExactSizeIterator for TerminalStackIterator<'_, '_> {
250    fn len(&self) -> usize {
251        self.size - self.index
252    }
253}
254
255pub struct TerminalWithCompiler<'a> {
256    terminal: &'a Terminal<'a>,
257    compiler: &'a Compiler,
258}
259
260impl std::fmt::Debug for TerminalWithCompiler<'_> {
261    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
262        writeln!(
263            f,
264            "match {} {:?}",
265            self.compiler.resolve_symbol(self.terminal.regex),
266            self.terminal.options
267        )?;
268        if !self.terminal.remaining.is_empty() {
269            write!(f, "remaining [")?;
270            for expr in &self.terminal.remaining {
271                write!(f, "{:?}, ", expr.with_compiler(self.compiler))?;
272            }
273            writeln!(f, "]")?;
274        }
275        if !self.terminal.stack.is_empty() {
276            writeln!(f, "stack")?;
277            for entry in &self.terminal.stack {
278                writeln!(f, "* {:?}", entry.with_compiler(self.compiler))?;
279            }
280        }
281        Ok(())
282    }
283}
284
285// Determines how a context should end, ie. what the last match in a context
286// should be.
287#[derive(Debug, Clone, PartialEq, Eq, Hash)]
288pub enum End<'a> {
289    // Reaching the end of the context is illegal
290    Illegal,
291    // Ignore things that don't match, ie. don't do anything at the end of the context
292    None,
293    // If the end of the context is reached, push another context
294    Push(Box<Lookahead<'a>>),
295}
296
297#[derive(Debug, Clone, PartialEq, Eq, Hash)]
298pub struct Lookahead<'a> {
299    pub terminals: Vec<Terminal<'a>>,
300    pub end: End<'a>,
301    pub empty: bool,
302}
303
304impl<'a> Lookahead<'a> {
305    fn append(&mut self, mut other: Self) {
306        self.terminals.append(&mut other.terminals);
307
308        self.empty = self.empty || other.empty;
309
310        match self.end {
311            End::Illegal => {
312                self.end = other.end;
313            }
314            End::None => {
315                self.end = match other.end {
316                    End::Illegal | End::None => End::None,
317                    End::Push(ref mut _push) => {
318                        // push.append_update_end(End::None);
319                        other.end
320                    }
321                };
322            }
323            End::Push(ref mut self_push) => {
324                match other.end {
325                    End::Illegal | End::None => {
326                        // self_push.append_update_end(End::None);
327                    }
328                    End::Push(other_push) => {
329                        self_push.append(*other_push);
330                    }
331                }
332            }
333        }
334    }
335
336    fn concatenate(&mut self, mut next: Self) {
337        // Should only be concatenating to empty lookaheads
338        assert!(self.empty);
339
340        self.end = match self.end {
341            End::Illegal => match next.end {
342                End::Illegal => End::Illegal,
343                End::None => End::Push(Box::new(Lookahead {
344                    terminals: next.terminals.clone(),
345                    end: End::None,
346                    empty: next.empty,
347                })),
348                End::Push(p) => End::Push(p),
349            },
350            End::None => match next.end {
351                End::Illegal => End::Push(Box::new(Lookahead {
352                    terminals: self.terminals.clone(),
353                    end: End::None,
354                    empty: next.empty,
355                })),
356                End::None => End::None,
357                _ => todo!(),
358            },
359            _ => todo!(),
360        };
361
362        self.terminals.append(&mut next.terminals);
363        self.empty = next.empty;
364    }
365
366    pub fn with_compiler(
367        &'a self,
368        compiler: &'a Compiler,
369    ) -> LookaheadWithCompiler<'a> {
370        LookaheadWithCompiler { lookahead: self, compiler }
371    }
372}
373
374pub struct LookaheadWithCompiler<'a> {
375    lookahead: &'a Lookahead<'a>,
376    compiler: &'a Compiler,
377}
378
379impl std::fmt::Debug for LookaheadWithCompiler<'_> {
380    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
381        writeln!(
382            f,
383            "LOOKAHEAD empty:{:?} end:{:?}",
384            self.lookahead.empty, self.lookahead.end
385        )?;
386        for terminal in &self.lookahead.terminals {
387            writeln!(f, "{:?}", terminal.with_compiler(self.compiler))?;
388        }
389        write!(f, "LOOKAHEAD END")
390    }
391}
392
393pub struct LookaheadState<'a> {
394    visited_variables: HashMap<Key, bool>,
395    pub compiler: &'a Compiler,
396}
397
398impl<'a> LookaheadState<'a> {
399    pub fn new(compiler: &'a Compiler) -> LookaheadState<'a> {
400        LookaheadState { visited_variables: HashMap::new(), compiler }
401    }
402
403    pub fn push_variable(&mut self, key: Key) -> Option<Lookahead<'a>> {
404        // If we're already in the stack of variables then we've got left recursion
405        if let Some(left_recursion) = self.visited_variables.get_mut(&key) {
406            *left_recursion = true;
407
408            // Create a sentinel terminal with no terminal options
409            Some(Lookahead {
410                terminals: vec![Terminal::new(key.as_symbol(), None)],
411                end: End::Illegal,
412                empty: false,
413            })
414        } else {
415            self.visited_variables.insert(key, false);
416            None
417        }
418    }
419
420    pub fn pop_variable(&mut self, key: Key, lookahead: &mut Lookahead<'a>) {
421        // Check if we have left recursion
422        if self.visited_variables.remove(&key).unwrap() {
423            // Extract terminals that follow a left recursion
424            let left_recursion_terminals = {
425                let mut result = vec![];
426                let mut i = 0;
427                while i < lookahead.terminals.len() {
428                    if lookahead.terminals[i].options.is_none() {
429                        result.push(lookahead.terminals.remove(i));
430                    } else {
431                        i += 1;
432                    }
433                }
434                result
435            };
436
437            // Build expressions for the terminals following a left recursion as
438            // a repetition of an alternation of concatenations.
439            // TODO: This doesn't seem right, but waiting on examples to work on
440            // this further.
441            let expressions = left_recursion_terminals
442                .into_iter()
443                .filter_map(|rt| {
444                    if rt.remaining.len() > 1 {
445                        Some(Expression::Concatenation {
446                            expressions: self
447                                .compiler
448                                .allocator
449                                .alloc_slice_fill_iter(
450                                    rt.remaining.iter().map(|e| (*e).clone()),
451                                ),
452                            location: TextLocation::invalid(),
453                        })
454                    } else if rt.remaining.len() == 1 {
455                        Some((*rt.remaining[0]).clone())
456                    } else {
457                        None
458                    }
459                })
460                .collect::<Vec<_>>();
461            let expressions =
462                self.compiler.allocator.alloc_slice_fill_iter(expressions);
463
464            if !expressions.is_empty() {
465                let expression = if expressions.len() > 1 {
466                    self.compiler.allocator.alloc(Expression::Alternation {
467                        expressions,
468                        location: TextLocation::invalid(),
469                    })
470                } else {
471                    expressions.iter_mut().next().unwrap()
472                };
473                let rep =
474                    self.compiler.allocator.alloc(Expression::Repetition {
475                        expression,
476                        location: TextLocation::invalid(),
477                    });
478
479                for term in &mut lookahead.terminals {
480                    term.remaining.insert(0, rep);
481                }
482            } else {
483                lookahead.empty = true;
484            }
485        }
486    }
487}
488
489// Transform and collect matches that the context for the expression needs to match
490pub fn lookahead<'a>(
491    interpreted: &Interpreted<'a>,
492    expression: &'a Expression,
493    state: &mut LookaheadState<'a>,
494) -> Lookahead<'a> {
495    match expression {
496        Expression::Variable { key, .. } => {
497            if let Some(lookahead) = state.push_variable(*key) {
498                return lookahead;
499            }
500
501            let rule = interpreted.rules.get(key).unwrap();
502
503            let mut la = lookahead(interpreted, rule.expression, state);
504
505            state.pop_variable(*key, &mut la);
506
507            // Add the variable to the stacks
508            for term in &mut la.terminals {
509                term.stack.push(StackEntry {
510                    data: StackEntryData::Variable { key: *key },
511                    remaining: vec![],
512                });
513            }
514
515            la
516        }
517        Expression::Terminal { regex, options, .. } => Lookahead {
518            terminals: vec![Terminal::new(*regex, Some(options))],
519            end: End::Illegal,
520            empty: false,
521        },
522        Expression::Passive { expression, .. } => {
523            let mut la = lookahead(interpreted, expression, state);
524            la.end = End::None;
525            la
526        }
527        Expression::Repetition { expression: child, .. } => {
528            let mut la = lookahead(interpreted, child, state);
529
530            // Add the repetition to the front of each match stack
531            for term in &mut la.terminals {
532                term.stack.push(StackEntry {
533                    data: StackEntryData::Repetition { expression },
534                    remaining: vec![],
535                });
536            }
537
538            // Treat repetitions optional
539            la.empty = true;
540
541            la
542        }
543        Expression::Optional { expression: child, .. } => {
544            let la = lookahead(interpreted, child, state);
545
546            match la.end {
547                End::Illegal => Lookahead {
548                    terminals: la.terminals,
549                    end: la.end,
550                    empty: true,
551                },
552                End::None | End::Push(_) => Lookahead {
553                    terminals: vec![],
554                    end: End::Push(Box::new(la)),
555                    empty: true,
556                },
557            }
558        }
559        Expression::Alternation { expressions, .. } => {
560            let mut la = Lookahead {
561                terminals: vec![],
562                end: End::Illegal,
563                empty: false,
564            };
565
566            for expression in *expressions {
567                la.append(lookahead(interpreted, expression, state));
568            }
569
570            la
571        }
572        Expression::Concatenation { expressions, .. } => {
573            lookahead_concatenation(interpreted, expressions.iter(), state)
574        }
575    }
576}
577
578// A concatenation of contexts is the first context that can't be empty, with
579// those before being alternations
580pub fn lookahead_concatenation<'a, I>(
581    interpreted: &Interpreted<'a>,
582    mut expressions: I,
583    state: &mut LookaheadState<'a>,
584) -> Lookahead<'a>
585where
586    I: std::iter::Iterator<Item = &'a Expression<'a>>,
587    I: std::clone::Clone,
588{
589    let mut f = |expr, remaining: I| {
590        let mut l: Lookahead<'a> = lookahead(interpreted, expr, state);
591
592        for terminal in &mut l.terminals {
593            terminal.get_last_remaining().extend(remaining.clone());
594        }
595        l
596    };
597
598    let mut la = f(expressions.next().unwrap(), expressions.clone());
599
600    if !la.empty {
601        return la;
602    }
603
604    while let Some(expression) = expressions.next() {
605        let l = f(expression, expressions.clone());
606
607        la.concatenate(l);
608
609        if !la.empty {
610            break;
611        }
612    }
613
614    la
615}
616
617// Collect the next context following the context stack
618pub fn advance_terminal<'a>(
619    interpreted: &Interpreted<'a>,
620    terminal: &Terminal<'a>,
621    compiler: &'a Compiler,
622) -> Option<Lookahead<'a>> {
623    let mut iter = terminal.iter();
624
625    let mut lookahead = None;
626
627    while let Some(entry) = iter.next() {
628        if entry.remaining.is_empty() {
629            continue;
630        }
631
632        let mut las = LookaheadState::new(compiler);
633
634        let mut la = match &entry.data {
635            Some(StackEntryData::Repetition { expression }) => {
636                lookahead_concatenation(
637                    interpreted,
638                    [expression]
639                        .iter()
640                        .cloned()
641                        .chain(entry.remaining.iter())
642                        .cloned(),
643                    &mut las,
644                )
645            }
646            _ => lookahead_concatenation(
647                interpreted,
648                entry.remaining.iter().cloned(),
649                &mut las,
650            ),
651        };
652
653        for t in &mut la.terminals {
654            // Check for recursion and convert it to a repetition
655            // TODO: The cloned here seems unnecessary
656            if !t.stack.is_empty()
657                && iter.clone().take(t.stack.len()).eq(t.stack.iter().cloned())
658            {
659                t.stack.extend(
660                    iter.clone()
661                        .take(t.stack.len())
662                        .map(|e| e.clone_stack_entry()),
663                );
664                continue;
665            }
666
667            t.stack.extend(iter.clone().map(|e| e.clone_stack_entry()));
668        }
669
670        match &mut lookahead {
671            None => {
672                lookahead = Some(la);
673            }
674            Some(lookahead) => {
675                lookahead.concatenate(la);
676            }
677        }
678
679        if !lookahead.as_ref().unwrap().empty {
680            break;
681        }
682    }
683
684    lookahead
685}
686
687#[cfg(test)]
688mod tests {
689    extern crate matches;
690    use matches::assert_matches;
691
692    use super::*;
693    use crate::compiler::interpreter::tests::*;
694    use crate::compiler::{collector, interpreter, CompileOptions, Compiler};
695    use crate::sbnf;
696
697    #[derive(Default)]
698    struct Harness {
699        compiler: Compiler,
700    }
701
702    impl Harness {
703        fn symbol(&self, name: &str) -> Symbol {
704            self.compiler.get_symbol(name)
705        }
706
707        fn lookahead<F>(&mut self, source: &str, rule_name: &str, fun: F)
708        where
709            F: Fn(Lookahead, &Compiler),
710        {
711            let grammar =
712                sbnf::parse(source, &self.compiler.allocator).unwrap();
713
714            let options = CompileOptions {
715                name_hint: Some("test"),
716                arguments: vec![],
717                debug_contexts: false,
718                entry_points: vec!["m"],
719            };
720
721            let collection =
722                collector::collect(&self.compiler, &options, &grammar);
723            assert!(collection.warnings.is_empty());
724
725            let collection = collection.result.unwrap();
726
727            let interpreter_result =
728                interpreter::interpret(&self.compiler, &options, collection);
729            assert!(interpreter_result.warnings.is_empty());
730
731            let interpreted = interpreter_result.result.as_ref().unwrap();
732
733            let key = interpreter::Key::basic(self.symbol(rule_name));
734            let rule = &interpreted.rules[&key];
735
736            let mut lookahead_state = LookaheadState::new(&self.compiler);
737            assert!(lookahead_state.push_variable(key).is_none());
738
739            let mut la =
740                lookahead(interpreted, rule.expression, &mut lookahead_state);
741
742            lookahead_state.pop_variable(key, &mut la);
743
744            println!("{:?}", la.with_compiler(&self.compiler));
745            fun(la, &self.compiler);
746        }
747    }
748
749    fn sed_rep<'a>(expression: &'a Expression) -> StackEntryData<'a> {
750        StackEntryData::Repetition { expression }
751    }
752
753    fn sed_var<'a>(key: Key) -> StackEntryData<'a> {
754        StackEntryData::Variable { key }
755    }
756
757    #[test]
758    fn collect_passive() {
759        let mut harness = Harness::default();
760        let sym_a = harness.symbol("a");
761        let sym_b = harness.symbol("b");
762        let sym_c = harness.symbol("c");
763
764        harness.lookahead("m : ~'a';", "m", |lookahead, _c| {
765            assert_eq!(lookahead.end, End::None);
766            assert!(!lookahead.empty);
767            assert_eq!(lookahead.terminals.len(), 1);
768            assert_eq!(lookahead.terminals[0].regex, sym_a);
769            assert_eq!(lookahead.terminals[0].remaining.len(), 0);
770            assert_eq!(lookahead.terminals[0].stack.len(), 0);
771        });
772
773        harness.lookahead("m : ~'a' 'b';", "m", |lookahead, _c| {
774            assert_eq!(lookahead.end, End::None);
775            assert!(!lookahead.empty);
776            assert_eq!(lookahead.terminals.len(), 1);
777            let term0 = &lookahead.terminals[0];
778            assert_eq!(term0.regex, sym_a);
779            assert_eq!(term0.remaining.len(), 1);
780            assert_eq!(term0.remaining[0], &expr_trm_noopt(sym_b));
781            assert_eq!(term0.stack.len(), 0);
782        });
783
784        harness.lookahead("m : ~'a'* ~'b';", "m", |lookahead, c| {
785            assert_eq!(lookahead.end, End::None);
786            assert!(!lookahead.empty);
787            assert_eq!(lookahead.terminals.len(), 2);
788            let term0 = &lookahead.terminals[0];
789            assert_eq!(term0.regex, sym_a);
790            assert_eq!(term0.remaining.len(), 0);
791            assert_eq!(term0.stack.len(), 1);
792            assert_eq!(
793                term0.stack[0].data,
794                sed_rep(&expr_rep(c, expr_trm_noopt(sym_a)))
795            );
796            assert_eq!(term0.stack[0].remaining.len(), 1);
797            assert_eq!(
798                term0.stack[0].remaining[0],
799                &expr_pas(c, expr_trm_noopt(sym_b))
800            );
801            let term1 = &lookahead.terminals[1];
802            assert_eq!(term1.regex, sym_b);
803            assert_eq!(term1.remaining.len(), 0);
804            assert_eq!(term1.stack.len(), 0);
805        });
806
807        harness.lookahead("m : (~'a')* ~'b';", "m", |lookahead, c| {
808            assert_eq!(lookahead.end, End::None);
809            assert!(!lookahead.empty);
810            assert_eq!(lookahead.terminals.len(), 2);
811            let term0 = &lookahead.terminals[0];
812            assert_eq!(term0.regex, sym_a);
813            assert_eq!(term0.remaining.len(), 0);
814            assert_eq!(term0.stack.len(), 1);
815            assert_eq!(
816                term0.stack[0].data,
817                sed_rep(&expr_rep(c, expr_pas(c, expr_trm_noopt(sym_a))))
818            );
819            assert_eq!(term0.stack[0].remaining.len(), 1);
820            assert_eq!(
821                term0.stack[0].remaining[0],
822                &expr_pas(c, expr_trm_noopt(sym_b))
823            );
824            let term1 = &lookahead.terminals[1];
825            assert_eq!(term1.regex, sym_b);
826            assert_eq!(term1.remaining.len(), 0);
827            assert_eq!(term1.stack.len(), 0);
828        });
829
830        harness.lookahead("m : ~('a' | 'b') 'c';", "m", |lookahead, _c| {
831            assert_matches!(lookahead.end, End::None);
832            assert!(!lookahead.empty);
833            assert_eq!(lookahead.terminals.len(), 2);
834            let term0 = &lookahead.terminals[0];
835            assert_eq!(term0.regex, sym_a);
836            assert_eq!(term0.remaining.len(), 1);
837            assert_eq!(term0.remaining[0], &expr_trm_noopt(sym_c));
838            assert_eq!(term0.stack.len(), 0);
839            let term1 = &lookahead.terminals[1];
840            assert_eq!(term1.regex, sym_b);
841            assert_eq!(term1.remaining.len(), 1);
842            assert_eq!(term1.remaining[0], &expr_trm_noopt(sym_c));
843            assert_eq!(term1.stack.len(), 0);
844        });
845
846        harness.lookahead("m : ~'a'?;", "m", |lookahead, _c| {
847            assert_matches!(lookahead.end, End::None);
848            assert!(lookahead.empty);
849            assert_eq!(lookahead.terminals.len(), 1);
850            let term0 = &lookahead.terminals[0];
851            assert_eq!(term0.regex, sym_a);
852            assert_eq!(term0.remaining.len(), 0);
853            assert_eq!(term0.stack.len(), 0);
854        });
855
856        harness.lookahead("m : (~'a')?;", "m", |lookahead, _c| {
857            match lookahead.end {
858                End::Push(lookahead) => {
859                    assert_matches!(lookahead.end, End::None);
860                    assert!(!lookahead.empty);
861                    assert_eq!(lookahead.terminals.len(), 1);
862                    let term0 = &lookahead.terminals[0];
863                    assert_eq!(term0.regex, sym_a);
864                    assert_eq!(term0.remaining.len(), 0);
865                    assert_eq!(term0.stack.len(), 0);
866                }
867                _ => panic!(),
868            }
869            assert!(lookahead.empty);
870            // TODO: This is wrong!
871            assert!(lookahead.terminals.is_empty());
872        });
873
874        harness.lookahead("m : (~'a')* 'b';", "m", |lookahead, c| {
875            match lookahead.end {
876                End::Push(lookahead) => {
877                    assert_matches!(lookahead.end, End::None);
878                    assert!(!lookahead.empty);
879                    assert_eq!(lookahead.terminals.len(), 1);
880                    let term0 = &lookahead.terminals[0];
881                    assert_eq!(term0.regex, sym_a);
882                    assert_eq!(term0.remaining.len(), 0);
883                    assert_eq!(term0.stack.len(), 1);
884                    assert_eq!(
885                        term0.stack[0].data,
886                        sed_rep(&expr_rep(
887                            c,
888                            expr_pas(c, expr_trm_noopt(sym_a))
889                        ))
890                    );
891                    assert_eq!(term0.stack[0].remaining.len(), 1);
892                    assert_eq!(
893                        term0.stack[0].remaining[0],
894                        &expr_trm_noopt(sym_b)
895                    );
896                }
897                _ => panic!(),
898            }
899            assert!(!lookahead.empty);
900            assert_eq!(lookahead.terminals.len(), 2);
901            let term0 = &lookahead.terminals[0];
902            assert_eq!(term0.regex, sym_a);
903            assert_eq!(term0.remaining.len(), 0);
904            assert_eq!(term0.stack.len(), 1);
905            assert_eq!(
906                term0.stack[0].data,
907                sed_rep(&expr_rep(c, expr_pas(c, expr_trm_noopt(sym_a))))
908            );
909            assert_eq!(term0.stack[0].remaining.len(), 1);
910            assert_eq!(term0.stack[0].remaining[0], &expr_trm_noopt(sym_b));
911            let term1 = &lookahead.terminals[1];
912            assert_eq!(term1.regex, sym_b);
913            assert_eq!(term1.remaining.len(), 0);
914            assert_eq!(term1.stack.len(), 0);
915        });
916
917        harness.lookahead("m : 'a'? ~'b';", "m", |lookahead, c| {
918            match lookahead.end {
919                End::Push(lookahead) => {
920                    assert_matches!(lookahead.end, End::None);
921                    assert!(!lookahead.empty);
922                    assert_eq!(lookahead.terminals.len(), 1);
923                    let term0 = &lookahead.terminals[0];
924                    assert_eq!(term0.regex, sym_b);
925                    assert_eq!(term0.remaining.len(), 0);
926                    assert_eq!(term0.stack.len(), 0);
927                }
928                _ => panic!(),
929            }
930            assert!(!lookahead.empty);
931            assert_eq!(lookahead.terminals.len(), 2);
932            let term0 = &lookahead.terminals[0];
933            assert_eq!(term0.regex, sym_a);
934            assert_eq!(term0.remaining.len(), 1);
935            assert_eq!(term0.remaining[0], &expr_pas(c, expr_trm_noopt(sym_b)));
936            assert_eq!(term0.stack.len(), 0);
937            let term1 = &lookahead.terminals[1];
938            assert_eq!(term1.regex, sym_b);
939            assert_eq!(term1.remaining.len(), 0);
940            assert_eq!(term1.stack.len(), 0);
941        });
942    }
943
944    #[test]
945    fn collect_repetition() {
946        let mut harness = Harness::default();
947        let sym_a = harness.symbol("a");
948        let sym_b = harness.symbol("b");
949        let sym_c = harness.symbol("c");
950
951        harness.lookahead("m : 'a'*;", "m", |lookahead, c| {
952            assert_matches!(lookahead.end, End::Illegal);
953            assert!(lookahead.empty);
954            assert_eq!(lookahead.terminals.len(), 1);
955            let term0 = &lookahead.terminals[0];
956            assert_eq!(term0.regex, sym_a);
957            assert_eq!(term0.remaining.len(), 0);
958            assert_eq!(term0.stack.len(), 1);
959            assert_eq!(
960                term0.stack[0].data,
961                sed_rep(&expr_rep(c, expr_trm_noopt(sym_a)))
962            );
963            assert_eq!(term0.stack[0].remaining.len(), 0);
964        });
965
966        harness.lookahead("m : ('a'? 'b' | 'c')*;", "m", |lookahead, c| {
967            let rep = expr_rep(
968                c,
969                expr_alt(
970                    c,
971                    &[
972                        expr_cat(
973                            c,
974                            &[
975                                expr_opt(c, expr_trm_noopt(sym_a)),
976                                expr_trm_noopt(sym_b),
977                            ],
978                        ),
979                        expr_trm_noopt(sym_c),
980                    ],
981                ),
982            );
983
984            assert_matches!(lookahead.end, End::Illegal);
985            assert!(lookahead.empty);
986            assert_eq!(lookahead.terminals.len(), 3);
987            let term0 = &lookahead.terminals[0];
988            assert_eq!(term0.regex, sym_a);
989            assert_eq!(term0.remaining.len(), 1);
990            assert_eq!(term0.remaining[0], &expr_trm_noopt(sym_b));
991            assert_eq!(term0.stack.len(), 1);
992            assert_eq!(term0.stack[0].data, sed_rep(&rep));
993            assert_eq!(term0.stack[0].remaining.len(), 0);
994            let term1 = &lookahead.terminals[1];
995            assert_eq!(term1.regex, sym_b);
996            assert_eq!(term1.remaining.len(), 0);
997            assert_eq!(term1.stack.len(), 1);
998            assert_eq!(term1.stack[0].data, sed_rep(&rep));
999            assert_eq!(term1.stack[0].remaining.len(), 0);
1000            let term2 = &lookahead.terminals[2];
1001            assert_eq!(term2.regex, sym_c);
1002            assert_eq!(term2.remaining.len(), 0);
1003            assert_eq!(term2.stack.len(), 1);
1004            assert_eq!(term2.stack[0].data, sed_rep(&rep));
1005            assert_eq!(term2.stack[0].remaining.len(), 0);
1006        });
1007    }
1008
1009    #[test]
1010    fn collect_optional() {
1011        let mut harness = Harness::default();
1012        let sym_a = harness.symbol("a");
1013        let sym_b = harness.symbol("b");
1014        let sym_c = harness.symbol("c");
1015
1016        harness.lookahead("m : 'a'?;", "m", |lookahead, _c| {
1017            assert_matches!(lookahead.end, End::Illegal);
1018            assert!(lookahead.empty);
1019            assert_eq!(lookahead.terminals.len(), 1);
1020            let term0 = &lookahead.terminals[0];
1021            assert_eq!(term0.regex, sym_a);
1022            assert_eq!(term0.remaining.len(), 0);
1023            assert_eq!(term0.stack.len(), 0);
1024        });
1025
1026        harness.lookahead("m : ('a' | 'b'* 'c')?;", "m", |lookahead, c| {
1027            assert_matches!(lookahead.end, End::Illegal);
1028            assert!(lookahead.empty);
1029            assert_eq!(lookahead.terminals.len(), 3);
1030            let term0 = &lookahead.terminals[0];
1031            assert_eq!(term0.regex, sym_a);
1032            assert_eq!(term0.remaining.len(), 0);
1033            assert_eq!(term0.stack.len(), 0);
1034            let term1 = &lookahead.terminals[1];
1035            assert_eq!(term1.regex, sym_b);
1036            assert_eq!(term1.remaining.len(), 0);
1037            assert_eq!(term1.stack.len(), 1);
1038            assert_eq!(
1039                term1.stack[0].data,
1040                sed_rep(&expr_rep(c, expr_trm_noopt(sym_b)))
1041            );
1042            assert_eq!(term1.stack[0].remaining.len(), 1);
1043            assert_eq!(term1.stack[0].remaining[0], &expr_trm_noopt(sym_c));
1044            let term2 = &lookahead.terminals[2];
1045            assert_eq!(term2.regex, sym_c);
1046            assert_eq!(term2.remaining.len(), 0);
1047            assert_eq!(term2.stack.len(), 0);
1048        });
1049    }
1050
1051    #[test]
1052    fn collect_alternation() {
1053        let mut harness = Harness::default();
1054        let sym_a = harness.symbol("a");
1055        let sym_b = harness.symbol("b");
1056        let sym_c = harness.symbol("c");
1057
1058        harness.lookahead("m : 'a' | 'b';", "m", |lookahead, _c| {
1059            assert_matches!(lookahead.end, End::Illegal);
1060            assert!(!lookahead.empty);
1061            assert_eq!(lookahead.terminals.len(), 2);
1062            let term0 = &lookahead.terminals[0];
1063            assert_eq!(term0.regex, sym_a);
1064            assert_eq!(term0.remaining.len(), 0);
1065            assert_eq!(term0.stack.len(), 0);
1066            let term1 = &lookahead.terminals[1];
1067            assert_eq!(term1.regex, sym_b);
1068            assert_eq!(term1.remaining.len(), 0);
1069            assert_eq!(term1.stack.len(), 0);
1070        });
1071
1072        harness.lookahead("m : 'a' | 'b' 'c';", "m", |lookahead, _c| {
1073            assert_matches!(lookahead.end, End::Illegal);
1074            assert!(!lookahead.empty);
1075            assert_eq!(lookahead.terminals.len(), 2);
1076            let term0 = &lookahead.terminals[0];
1077            assert_eq!(term0.regex, sym_a);
1078            assert_eq!(term0.remaining.len(), 0);
1079            assert_eq!(term0.stack.len(), 0);
1080            let term1 = &lookahead.terminals[1];
1081            assert_eq!(term1.regex, sym_b);
1082            assert_eq!(term1.remaining.len(), 1);
1083            assert_eq!(term1.remaining[0], &expr_trm_noopt(sym_c));
1084            assert_eq!(term1.stack.len(), 0);
1085        });
1086
1087        harness.lookahead("m : 'a'? | 'b' | 'c'*;", "m", |lookahead, c| {
1088            assert_matches!(lookahead.end, End::Illegal);
1089            assert!(lookahead.empty);
1090            assert_eq!(lookahead.terminals.len(), 3);
1091            let term0 = &lookahead.terminals[0];
1092            assert_eq!(term0.regex, sym_a);
1093            assert_eq!(term0.remaining.len(), 0);
1094            assert_eq!(term0.stack.len(), 0);
1095            let term1 = &lookahead.terminals[1];
1096            assert_eq!(term1.regex, sym_b);
1097            assert_eq!(term1.remaining.len(), 0);
1098            assert_eq!(term1.stack.len(), 0);
1099            let term2 = &lookahead.terminals[2];
1100            assert_eq!(term2.regex, sym_c);
1101            assert_eq!(term2.remaining.len(), 0);
1102            assert_eq!(term2.stack.len(), 1);
1103            assert_eq!(
1104                term2.stack[0].data,
1105                sed_rep(&expr_rep(c, expr_trm_noopt(sym_c)))
1106            );
1107            assert_eq!(term2.stack[0].remaining.len(), 0);
1108        });
1109    }
1110
1111    #[test]
1112    fn collect_concat() {
1113        let mut harness = Harness::default();
1114        let sym_a = harness.symbol("a");
1115        let sym_b = harness.symbol("b");
1116        let sym_c = harness.symbol("c");
1117        let r_key = Key::basic(harness.symbol("r"));
1118
1119        harness.lookahead("m : 'a' 'b';", "m", |lookahead, _c| {
1120            assert_matches!(lookahead.end, End::Illegal);
1121            assert!(!lookahead.empty);
1122            assert_eq!(lookahead.terminals.len(), 1);
1123            let term0 = &lookahead.terminals[0];
1124            assert_eq!(term0.regex, sym_a);
1125            assert_eq!(term0.remaining.len(), 1);
1126            assert_eq!(term0.remaining[0], &expr_trm_noopt(sym_b));
1127            assert_eq!(term0.stack.len(), 0);
1128        });
1129
1130        harness.lookahead("m : ('a' | 'b') 'c';", "m", |lookahead, _c| {
1131            assert_matches!(lookahead.end, End::Illegal);
1132            assert!(!lookahead.empty);
1133            assert_eq!(lookahead.terminals.len(), 2);
1134            let term0 = &lookahead.terminals[0];
1135            assert_eq!(term0.regex, sym_a);
1136            assert_eq!(term0.remaining.len(), 1);
1137            assert_eq!(term0.remaining[0], &expr_trm_noopt(sym_c));
1138            assert_eq!(term0.stack.len(), 0);
1139            let term1 = &lookahead.terminals[1];
1140            assert_eq!(term1.regex, sym_b);
1141            assert_eq!(term1.remaining.len(), 1);
1142            assert_eq!(term1.remaining[0], &expr_trm_noopt(sym_c));
1143            assert_eq!(term1.stack.len(), 0);
1144        });
1145
1146        harness.lookahead("m : 'a'? 'b'* 'c';", "m", |lookahead, c| {
1147            assert_matches!(lookahead.end, End::Illegal);
1148            assert!(!lookahead.empty);
1149            assert_eq!(lookahead.terminals.len(), 3);
1150            let term0 = &lookahead.terminals[0];
1151            assert_eq!(term0.regex, sym_a);
1152            assert_eq!(term0.remaining.len(), 2);
1153            assert_eq!(term0.remaining[0], &expr_rep(c, expr_trm_noopt(sym_b)));
1154            assert_eq!(term0.remaining[1], &expr_trm_noopt(sym_c));
1155            assert_eq!(term0.stack.len(), 0);
1156            let term1 = &lookahead.terminals[1];
1157            assert_eq!(term1.regex, sym_b);
1158            assert_eq!(term1.remaining.len(), 0);
1159            assert_eq!(term1.stack.len(), 1);
1160            assert_eq!(
1161                term1.stack[0].data,
1162                sed_rep(&expr_rep(c, expr_trm_noopt(sym_b)))
1163            );
1164            assert_eq!(term1.stack[0].remaining.len(), 1);
1165            assert_eq!(term1.stack[0].remaining[0], &expr_trm_noopt(sym_c));
1166            let term2 = &lookahead.terminals[2];
1167            assert_eq!(term2.regex, sym_c);
1168            assert_eq!(term2.remaining.len(), 0);
1169            assert_eq!(term2.stack.len(), 0);
1170        });
1171
1172        harness.lookahead("m : 'a'* 'b'?;", "m", |lookahead, c| {
1173            assert_matches!(lookahead.end, End::Illegal);
1174            assert!(lookahead.empty);
1175            assert_eq!(lookahead.terminals.len(), 2);
1176            let term0 = &lookahead.terminals[0];
1177            assert_eq!(term0.regex, sym_a);
1178            assert_eq!(term0.remaining.len(), 0);
1179            assert_eq!(term0.stack.len(), 1);
1180            assert_eq!(
1181                term0.stack[0].data,
1182                sed_rep(&expr_rep(c, expr_trm_noopt(sym_a)))
1183            );
1184            assert_eq!(term0.stack[0].remaining.len(), 1);
1185            assert_eq!(
1186                term0.stack[0].remaining[0],
1187                &expr_opt(c, expr_trm_noopt(sym_b))
1188            );
1189            let term1 = &lookahead.terminals[1];
1190            assert_eq!(term1.regex, sym_b);
1191            assert_eq!(term1.remaining.len(), 0);
1192            assert_eq!(term1.stack.len(), 0);
1193        });
1194
1195        harness.lookahead(
1196            "m : r? 'b' ; r : 'a' r? 'b' ;",
1197            "m",
1198            |lookahead, _c| {
1199                assert_matches!(lookahead.end, End::Illegal);
1200                assert!(!lookahead.empty);
1201                assert_eq!(lookahead.terminals.len(), 2);
1202                let term0 = &lookahead.terminals[0];
1203                assert_eq!(term0.regex, sym_a);
1204                assert_eq!(term0.remaining.len(), 2);
1205                assert_eq!(term0.stack.len(), 1);
1206                assert_eq!(term0.stack[0].data, sed_var(r_key));
1207                assert_eq!(term0.stack[0].remaining.len(), 1);
1208                assert_eq!(term0.stack[0].remaining[0], &expr_trm_noopt(sym_b));
1209                let term1 = &lookahead.terminals[1];
1210                assert_eq!(term1.regex, sym_b);
1211                assert_eq!(term1.remaining.len(), 0);
1212                assert_eq!(term1.stack.len(), 0);
1213            },
1214        );
1215    }
1216
1217    #[test]
1218    fn collect_left_recursion() {
1219        let mut harness = Harness::default();
1220        let sym_a = harness.symbol("a");
1221        let sym_b = harness.symbol("b");
1222        let sym_c = harness.symbol("c");
1223        let m_key = Key::basic(harness.symbol("m"));
1224        let r_key = Key::basic(harness.symbol("r"));
1225
1226        harness.lookahead("m : m ;", "m", |lookahead, _c| {
1227            assert_matches!(lookahead.end, End::Illegal);
1228            assert!(lookahead.empty);
1229            assert_eq!(lookahead.terminals.len(), 0);
1230        });
1231
1232        harness.lookahead("m : m 'a' | 'b';", "m", |lookahead, c| {
1233            assert_matches!(lookahead.end, End::Illegal);
1234            assert!(!lookahead.empty);
1235            assert_eq!(lookahead.terminals.len(), 1);
1236            let term0 = &lookahead.terminals[0];
1237            assert_eq!(term0.regex, sym_b);
1238            assert_eq!(term0.remaining.len(), 1);
1239            assert_eq!(term0.remaining[0], &expr_rep(c, expr_trm_noopt(sym_a)));
1240            assert_eq!(term0.stack.len(), 0);
1241        });
1242
1243        harness.lookahead("m : m 'a' | m 'b' | 'c';", "m", |lookahead, c| {
1244            assert_matches!(lookahead.end, End::Illegal);
1245            assert!(!lookahead.empty);
1246            assert_eq!(lookahead.terminals.len(), 1);
1247            let term0 = &lookahead.terminals[0];
1248            assert_eq!(term0.regex, sym_c);
1249            assert_eq!(term0.remaining.len(), 1);
1250            assert_eq!(
1251                term0.remaining[0],
1252                &expr_rep(
1253                    c,
1254                    expr_alt(
1255                        c,
1256                        &[expr_trm_noopt(sym_a), expr_trm_noopt(sym_b)]
1257                    )
1258                )
1259            );
1260            assert_eq!(term0.stack.len(), 0);
1261        });
1262
1263        harness.lookahead("m : r? m ; r : 'a' ;", "m", |lookahead, _c| {
1264            assert_matches!(lookahead.end, End::Illegal);
1265            assert!(lookahead.empty);
1266            assert_eq!(lookahead.terminals.len(), 1);
1267            let term0 = &lookahead.terminals[0];
1268            assert_eq!(term0.regex, sym_a);
1269            assert_eq!(term0.remaining.len(), 0);
1270            assert_eq!(term0.stack.len(), 1);
1271            assert_eq!(term0.stack[0].data, sed_var(r_key));
1272            assert_eq!(term0.stack[0].remaining.len(), 1);
1273            assert_eq!(term0.stack[0].remaining[0], &expr_var(m_key));
1274        });
1275    }
1276}