1use super::super::common::{Compiler, Symbol};
26use super::super::interpreter::{
27 Expression, Interpreted, Key, TerminalOptions,
28};
29use crate::sbnf::TextLocation;
30use hashbrown::HashMap;
31
32#[derive(Debug, Clone, PartialEq, Eq, Hash)]
33pub enum StackEntryData<'a> {
34 Variable { key: Key },
35 Repetition { expression: &'a Expression<'a> },
36}
37
38#[derive(Debug, Clone, PartialEq, Eq, Hash)]
39pub struct StackEntry<'a> {
40 pub data: StackEntryData<'a>,
41 pub remaining: Vec<&'a Expression<'a>>,
42}
43
44impl<'a> StackEntry<'a> {
45 pub fn with_compiler(
46 &'a self,
47 compiler: &'a Compiler,
48 ) -> StackEntryWithCompiler<'a> {
49 StackEntryWithCompiler { entry: self, compiler }
50 }
51}
52
53pub struct StackEntryWithCompiler<'a> {
54 entry: &'a StackEntry<'a>,
55 compiler: &'a Compiler,
56}
57
58impl std::fmt::Debug for StackEntryWithCompiler<'_> {
59 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
60 match self.entry.data {
61 StackEntryData::Variable { key } => {
62 write!(f, "var {}", key.with_compiler(self.compiler))?;
63 }
64 StackEntryData::Repetition { expression } => {
65 write!(
66 f,
67 "repetition {:?}",
68 expression.with_compiler(self.compiler)
69 )?;
70 }
71 }
72 if !self.entry.remaining.is_empty() {
73 write!(f, " [")?;
74 for expr in &self.entry.remaining {
75 write!(f, "{:?}, ", expr.with_compiler(self.compiler))?;
76 }
77 write!(f, "]")?;
78 }
79 Ok(())
80 }
81}
82
83#[derive(Debug, Clone, PartialEq, Eq)]
84pub struct Terminal<'a> {
85 pub regex: Symbol,
86 pub options: Option<&'a TerminalOptions<'a>>,
88 pub remaining: Vec<&'a Expression<'a>>,
89 pub stack: Vec<StackEntry<'a>>,
90}
91
92impl<'a> Terminal<'a> {
93 fn new(regex: Symbol, options: Option<&'a TerminalOptions>) -> Self {
94 Terminal { regex, options, remaining: vec![], stack: vec![] }
95 }
96
97 fn get_last_remaining(&mut self) -> &mut Vec<&'a Expression<'a>> {
98 self.get_mut_remaining(self.stack.len())
99 }
100
101 fn get_mut_remaining(
102 &mut self,
103 index: usize,
104 ) -> &mut Vec<&'a Expression<'a>> {
105 if index == 0 {
106 &mut self.remaining
107 } else {
108 &mut self.stack[index - 1].remaining
109 }
110 }
111
112 pub fn get_remaining(&self, index: usize) -> &Vec<&'a Expression<'a>> {
113 if index == 0 {
114 &self.remaining
115 } else {
116 &self.stack[index - 1].remaining
117 }
118 }
119
120 pub fn local_key(&self, topmost: Key) -> Key {
121 for entry in &self.stack {
122 if let StackEntryData::Variable { key } = &entry.data {
123 return *key;
124 }
125 }
126
127 topmost
128 }
129
130 pub fn iter<'b>(&'b self) -> TerminalStackIterator<'a, 'b> {
131 TerminalStackIterator {
132 terminal: self,
133 index: 0,
134 size: self.stack.len() + 1,
135 }
136 }
137
138 pub fn has_any_remaining(&self) -> bool {
139 if !self.remaining.is_empty() {
140 return true;
141 }
142
143 for entry in self.stack.iter().rev() {
144 if !entry.remaining.is_empty() {
145 return true;
146 }
147 }
148
149 false
150 }
151
152 pub fn with_compiler(
153 &'a self,
154 compiler: &'a Compiler,
155 ) -> TerminalWithCompiler<'a> {
156 TerminalWithCompiler { terminal: self, compiler }
157 }
158}
159
160impl std::hash::Hash for Terminal<'_> {
161 fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
162 self.regex.hash(state);
163 self.remaining.hash(state);
164 self.stack.hash(state);
165 }
166}
167
168#[derive(Debug, Clone)]
169pub struct TerminalStackIterator<'a, 'b> {
170 terminal: &'b Terminal<'a>,
171 index: usize,
172 size: usize,
173}
174
175#[derive(Debug, Clone)]
176pub struct TerminalStackIteratorItem<'a, 'b> {
177 pub data: Option<&'b StackEntryData<'a>>,
178 pub remaining: &'b Vec<&'a Expression<'a>>,
179}
180
181impl<'a> TerminalStackIteratorItem<'a, '_> {
182 fn clone_stack_entry(&self) -> StackEntry<'a> {
183 StackEntry {
184 data: self.data.unwrap().clone(),
185 remaining: self.remaining.clone(),
186 }
187 }
188}
189
190impl<'a> PartialEq<StackEntry<'a>> for TerminalStackIteratorItem<'a, '_> {
191 fn eq(&self, other: &StackEntry<'a>) -> bool {
192 self.data.is_some_and(|d| *d == other.data)
193 && *self.remaining == other.remaining
194 }
195}
196
197impl<'a, 'b> TerminalStackIterator<'a, 'b> {
198 fn get_item(&self, index: usize) -> TerminalStackIteratorItem<'a, 'b> {
199 if index == 0 {
200 TerminalStackIteratorItem {
201 data: None,
202 remaining: &self.terminal.remaining,
203 }
204 } else {
205 let stack_entry = &self.terminal.stack[index - 1];
206
207 TerminalStackIteratorItem {
208 data: Some(&stack_entry.data),
209 remaining: &stack_entry.remaining,
210 }
211 }
212 }
213
214 fn _is_empty(&self) -> bool {
215 self.index == self.size
216 }
217}
218
219impl<'a, 'b> Iterator for TerminalStackIterator<'a, 'b> {
220 type Item = TerminalStackIteratorItem<'a, 'b>;
221
222 fn next(&mut self) -> Option<Self::Item> {
223 if self._is_empty() {
224 return None;
225 }
226
227 let result = self.get_item(self.index);
228 self.index += 1;
229 Some(result)
230 }
231
232 fn size_hint(&self) -> (usize, Option<usize>) {
233 let l = self.len();
234 (l, Some(l))
235 }
236}
237
238impl DoubleEndedIterator for TerminalStackIterator<'_, '_> {
239 fn next_back(&mut self) -> Option<Self::Item> {
240 if self._is_empty() {
241 return None;
242 }
243
244 self.size -= 1;
245 Some(self.get_item(self.size))
246 }
247}
248
249impl ExactSizeIterator for TerminalStackIterator<'_, '_> {
250 fn len(&self) -> usize {
251 self.size - self.index
252 }
253}
254
255pub struct TerminalWithCompiler<'a> {
256 terminal: &'a Terminal<'a>,
257 compiler: &'a Compiler,
258}
259
260impl std::fmt::Debug for TerminalWithCompiler<'_> {
261 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
262 writeln!(
263 f,
264 "match {} {:?}",
265 self.compiler.resolve_symbol(self.terminal.regex),
266 self.terminal.options
267 )?;
268 if !self.terminal.remaining.is_empty() {
269 write!(f, "remaining [")?;
270 for expr in &self.terminal.remaining {
271 write!(f, "{:?}, ", expr.with_compiler(self.compiler))?;
272 }
273 writeln!(f, "]")?;
274 }
275 if !self.terminal.stack.is_empty() {
276 writeln!(f, "stack")?;
277 for entry in &self.terminal.stack {
278 writeln!(f, "* {:?}", entry.with_compiler(self.compiler))?;
279 }
280 }
281 Ok(())
282 }
283}
284
285#[derive(Debug, Clone, PartialEq, Eq, Hash)]
288pub enum End<'a> {
289 Illegal,
291 None,
293 Push(Box<Lookahead<'a>>),
295}
296
297#[derive(Debug, Clone, PartialEq, Eq, Hash)]
298pub struct Lookahead<'a> {
299 pub terminals: Vec<Terminal<'a>>,
300 pub end: End<'a>,
301 pub empty: bool,
302}
303
304impl<'a> Lookahead<'a> {
305 fn append(&mut self, mut other: Self) {
306 self.terminals.append(&mut other.terminals);
307
308 self.empty = self.empty || other.empty;
309
310 match self.end {
311 End::Illegal => {
312 self.end = other.end;
313 }
314 End::None => {
315 self.end = match other.end {
316 End::Illegal | End::None => End::None,
317 End::Push(ref mut _push) => {
318 other.end
320 }
321 };
322 }
323 End::Push(ref mut self_push) => {
324 match other.end {
325 End::Illegal | End::None => {
326 }
328 End::Push(other_push) => {
329 self_push.append(*other_push);
330 }
331 }
332 }
333 }
334 }
335
336 fn concatenate(&mut self, mut next: Self) {
337 assert!(self.empty);
339
340 self.end = match self.end {
341 End::Illegal => match next.end {
342 End::Illegal => End::Illegal,
343 End::None => End::Push(Box::new(Lookahead {
344 terminals: next.terminals.clone(),
345 end: End::None,
346 empty: next.empty,
347 })),
348 End::Push(p) => End::Push(p),
349 },
350 End::None => match next.end {
351 End::Illegal => End::Push(Box::new(Lookahead {
352 terminals: self.terminals.clone(),
353 end: End::None,
354 empty: next.empty,
355 })),
356 End::None => End::None,
357 _ => todo!(),
358 },
359 _ => todo!(),
360 };
361
362 self.terminals.append(&mut next.terminals);
363 self.empty = next.empty;
364 }
365
366 pub fn with_compiler(
367 &'a self,
368 compiler: &'a Compiler,
369 ) -> LookaheadWithCompiler<'a> {
370 LookaheadWithCompiler { lookahead: self, compiler }
371 }
372}
373
374pub struct LookaheadWithCompiler<'a> {
375 lookahead: &'a Lookahead<'a>,
376 compiler: &'a Compiler,
377}
378
379impl std::fmt::Debug for LookaheadWithCompiler<'_> {
380 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
381 writeln!(
382 f,
383 "LOOKAHEAD empty:{:?} end:{:?}",
384 self.lookahead.empty, self.lookahead.end
385 )?;
386 for terminal in &self.lookahead.terminals {
387 writeln!(f, "{:?}", terminal.with_compiler(self.compiler))?;
388 }
389 write!(f, "LOOKAHEAD END")
390 }
391}
392
393pub struct LookaheadState<'a> {
394 visited_variables: HashMap<Key, bool>,
395 pub compiler: &'a Compiler,
396}
397
398impl<'a> LookaheadState<'a> {
399 pub fn new(compiler: &'a Compiler) -> LookaheadState<'a> {
400 LookaheadState { visited_variables: HashMap::new(), compiler }
401 }
402
403 pub fn push_variable(&mut self, key: Key) -> Option<Lookahead<'a>> {
404 if let Some(left_recursion) = self.visited_variables.get_mut(&key) {
406 *left_recursion = true;
407
408 Some(Lookahead {
410 terminals: vec![Terminal::new(key.as_symbol(), None)],
411 end: End::Illegal,
412 empty: false,
413 })
414 } else {
415 self.visited_variables.insert(key, false);
416 None
417 }
418 }
419
420 pub fn pop_variable(&mut self, key: Key, lookahead: &mut Lookahead<'a>) {
421 if self.visited_variables.remove(&key).unwrap() {
423 let left_recursion_terminals = {
425 let mut result = vec![];
426 let mut i = 0;
427 while i < lookahead.terminals.len() {
428 if lookahead.terminals[i].options.is_none() {
429 result.push(lookahead.terminals.remove(i));
430 } else {
431 i += 1;
432 }
433 }
434 result
435 };
436
437 let expressions = left_recursion_terminals
442 .into_iter()
443 .filter_map(|rt| {
444 if rt.remaining.len() > 1 {
445 Some(Expression::Concatenation {
446 expressions: self
447 .compiler
448 .allocator
449 .alloc_slice_fill_iter(
450 rt.remaining.iter().map(|e| (*e).clone()),
451 ),
452 location: TextLocation::invalid(),
453 })
454 } else if rt.remaining.len() == 1 {
455 Some((*rt.remaining[0]).clone())
456 } else {
457 None
458 }
459 })
460 .collect::<Vec<_>>();
461 let expressions =
462 self.compiler.allocator.alloc_slice_fill_iter(expressions);
463
464 if !expressions.is_empty() {
465 let expression = if expressions.len() > 1 {
466 self.compiler.allocator.alloc(Expression::Alternation {
467 expressions,
468 location: TextLocation::invalid(),
469 })
470 } else {
471 expressions.iter_mut().next().unwrap()
472 };
473 let rep =
474 self.compiler.allocator.alloc(Expression::Repetition {
475 expression,
476 location: TextLocation::invalid(),
477 });
478
479 for term in &mut lookahead.terminals {
480 term.remaining.insert(0, rep);
481 }
482 } else {
483 lookahead.empty = true;
484 }
485 }
486 }
487}
488
489pub fn lookahead<'a>(
491 interpreted: &Interpreted<'a>,
492 expression: &'a Expression,
493 state: &mut LookaheadState<'a>,
494) -> Lookahead<'a> {
495 match expression {
496 Expression::Variable { key, .. } => {
497 if let Some(lookahead) = state.push_variable(*key) {
498 return lookahead;
499 }
500
501 let rule = interpreted.rules.get(key).unwrap();
502
503 let mut la = lookahead(interpreted, rule.expression, state);
504
505 state.pop_variable(*key, &mut la);
506
507 for term in &mut la.terminals {
509 term.stack.push(StackEntry {
510 data: StackEntryData::Variable { key: *key },
511 remaining: vec![],
512 });
513 }
514
515 la
516 }
517 Expression::Terminal { regex, options, .. } => Lookahead {
518 terminals: vec![Terminal::new(*regex, Some(options))],
519 end: End::Illegal,
520 empty: false,
521 },
522 Expression::Passive { expression, .. } => {
523 let mut la = lookahead(interpreted, expression, state);
524 la.end = End::None;
525 la
526 }
527 Expression::Repetition { expression: child, .. } => {
528 let mut la = lookahead(interpreted, child, state);
529
530 for term in &mut la.terminals {
532 term.stack.push(StackEntry {
533 data: StackEntryData::Repetition { expression },
534 remaining: vec![],
535 });
536 }
537
538 la.empty = true;
540
541 la
542 }
543 Expression::Optional { expression: child, .. } => {
544 let la = lookahead(interpreted, child, state);
545
546 match la.end {
547 End::Illegal => Lookahead {
548 terminals: la.terminals,
549 end: la.end,
550 empty: true,
551 },
552 End::None | End::Push(_) => Lookahead {
553 terminals: vec![],
554 end: End::Push(Box::new(la)),
555 empty: true,
556 },
557 }
558 }
559 Expression::Alternation { expressions, .. } => {
560 let mut la = Lookahead {
561 terminals: vec![],
562 end: End::Illegal,
563 empty: false,
564 };
565
566 for expression in *expressions {
567 la.append(lookahead(interpreted, expression, state));
568 }
569
570 la
571 }
572 Expression::Concatenation { expressions, .. } => {
573 lookahead_concatenation(interpreted, expressions.iter(), state)
574 }
575 }
576}
577
578pub fn lookahead_concatenation<'a, I>(
581 interpreted: &Interpreted<'a>,
582 mut expressions: I,
583 state: &mut LookaheadState<'a>,
584) -> Lookahead<'a>
585where
586 I: std::iter::Iterator<Item = &'a Expression<'a>>,
587 I: std::clone::Clone,
588{
589 let mut f = |expr, remaining: I| {
590 let mut l: Lookahead<'a> = lookahead(interpreted, expr, state);
591
592 for terminal in &mut l.terminals {
593 terminal.get_last_remaining().extend(remaining.clone());
594 }
595 l
596 };
597
598 let mut la = f(expressions.next().unwrap(), expressions.clone());
599
600 if !la.empty {
601 return la;
602 }
603
604 while let Some(expression) = expressions.next() {
605 let l = f(expression, expressions.clone());
606
607 la.concatenate(l);
608
609 if !la.empty {
610 break;
611 }
612 }
613
614 la
615}
616
617pub fn advance_terminal<'a>(
619 interpreted: &Interpreted<'a>,
620 terminal: &Terminal<'a>,
621 compiler: &'a Compiler,
622) -> Option<Lookahead<'a>> {
623 let mut iter = terminal.iter();
624
625 let mut lookahead = None;
626
627 while let Some(entry) = iter.next() {
628 if entry.remaining.is_empty() {
629 continue;
630 }
631
632 let mut las = LookaheadState::new(compiler);
633
634 let mut la = match &entry.data {
635 Some(StackEntryData::Repetition { expression }) => {
636 lookahead_concatenation(
637 interpreted,
638 [expression]
639 .iter()
640 .cloned()
641 .chain(entry.remaining.iter())
642 .cloned(),
643 &mut las,
644 )
645 }
646 _ => lookahead_concatenation(
647 interpreted,
648 entry.remaining.iter().cloned(),
649 &mut las,
650 ),
651 };
652
653 for t in &mut la.terminals {
654 if !t.stack.is_empty()
657 && iter.clone().take(t.stack.len()).eq(t.stack.iter().cloned())
658 {
659 t.stack.extend(
660 iter.clone()
661 .take(t.stack.len())
662 .map(|e| e.clone_stack_entry()),
663 );
664 continue;
665 }
666
667 t.stack.extend(iter.clone().map(|e| e.clone_stack_entry()));
668 }
669
670 match &mut lookahead {
671 None => {
672 lookahead = Some(la);
673 }
674 Some(lookahead) => {
675 lookahead.concatenate(la);
676 }
677 }
678
679 if !lookahead.as_ref().unwrap().empty {
680 break;
681 }
682 }
683
684 lookahead
685}
686
687#[cfg(test)]
688mod tests {
689 extern crate matches;
690 use matches::assert_matches;
691
692 use super::*;
693 use crate::compiler::interpreter::tests::*;
694 use crate::compiler::{collector, interpreter, CompileOptions, Compiler};
695 use crate::sbnf;
696
697 #[derive(Default)]
698 struct Harness {
699 compiler: Compiler,
700 }
701
702 impl Harness {
703 fn symbol(&self, name: &str) -> Symbol {
704 self.compiler.get_symbol(name)
705 }
706
707 fn lookahead<F>(&mut self, source: &str, rule_name: &str, fun: F)
708 where
709 F: Fn(Lookahead, &Compiler),
710 {
711 let grammar =
712 sbnf::parse(source, &self.compiler.allocator).unwrap();
713
714 let options = CompileOptions {
715 name_hint: Some("test"),
716 arguments: vec![],
717 debug_contexts: false,
718 entry_points: vec!["m"],
719 };
720
721 let collection =
722 collector::collect(&self.compiler, &options, &grammar);
723 assert!(collection.warnings.is_empty());
724
725 let collection = collection.result.unwrap();
726
727 let interpreter_result =
728 interpreter::interpret(&self.compiler, &options, collection);
729 assert!(interpreter_result.warnings.is_empty());
730
731 let interpreted = interpreter_result.result.as_ref().unwrap();
732
733 let key = interpreter::Key::basic(self.symbol(rule_name));
734 let rule = &interpreted.rules[&key];
735
736 let mut lookahead_state = LookaheadState::new(&self.compiler);
737 assert!(lookahead_state.push_variable(key).is_none());
738
739 let mut la =
740 lookahead(interpreted, rule.expression, &mut lookahead_state);
741
742 lookahead_state.pop_variable(key, &mut la);
743
744 println!("{:?}", la.with_compiler(&self.compiler));
745 fun(la, &self.compiler);
746 }
747 }
748
749 fn sed_rep<'a>(expression: &'a Expression) -> StackEntryData<'a> {
750 StackEntryData::Repetition { expression }
751 }
752
753 fn sed_var<'a>(key: Key) -> StackEntryData<'a> {
754 StackEntryData::Variable { key }
755 }
756
757 #[test]
758 fn collect_passive() {
759 let mut harness = Harness::default();
760 let sym_a = harness.symbol("a");
761 let sym_b = harness.symbol("b");
762 let sym_c = harness.symbol("c");
763
764 harness.lookahead("m : ~'a';", "m", |lookahead, _c| {
765 assert_eq!(lookahead.end, End::None);
766 assert!(!lookahead.empty);
767 assert_eq!(lookahead.terminals.len(), 1);
768 assert_eq!(lookahead.terminals[0].regex, sym_a);
769 assert_eq!(lookahead.terminals[0].remaining.len(), 0);
770 assert_eq!(lookahead.terminals[0].stack.len(), 0);
771 });
772
773 harness.lookahead("m : ~'a' 'b';", "m", |lookahead, _c| {
774 assert_eq!(lookahead.end, End::None);
775 assert!(!lookahead.empty);
776 assert_eq!(lookahead.terminals.len(), 1);
777 let term0 = &lookahead.terminals[0];
778 assert_eq!(term0.regex, sym_a);
779 assert_eq!(term0.remaining.len(), 1);
780 assert_eq!(term0.remaining[0], &expr_trm_noopt(sym_b));
781 assert_eq!(term0.stack.len(), 0);
782 });
783
784 harness.lookahead("m : ~'a'* ~'b';", "m", |lookahead, c| {
785 assert_eq!(lookahead.end, End::None);
786 assert!(!lookahead.empty);
787 assert_eq!(lookahead.terminals.len(), 2);
788 let term0 = &lookahead.terminals[0];
789 assert_eq!(term0.regex, sym_a);
790 assert_eq!(term0.remaining.len(), 0);
791 assert_eq!(term0.stack.len(), 1);
792 assert_eq!(
793 term0.stack[0].data,
794 sed_rep(&expr_rep(c, expr_trm_noopt(sym_a)))
795 );
796 assert_eq!(term0.stack[0].remaining.len(), 1);
797 assert_eq!(
798 term0.stack[0].remaining[0],
799 &expr_pas(c, expr_trm_noopt(sym_b))
800 );
801 let term1 = &lookahead.terminals[1];
802 assert_eq!(term1.regex, sym_b);
803 assert_eq!(term1.remaining.len(), 0);
804 assert_eq!(term1.stack.len(), 0);
805 });
806
807 harness.lookahead("m : (~'a')* ~'b';", "m", |lookahead, c| {
808 assert_eq!(lookahead.end, End::None);
809 assert!(!lookahead.empty);
810 assert_eq!(lookahead.terminals.len(), 2);
811 let term0 = &lookahead.terminals[0];
812 assert_eq!(term0.regex, sym_a);
813 assert_eq!(term0.remaining.len(), 0);
814 assert_eq!(term0.stack.len(), 1);
815 assert_eq!(
816 term0.stack[0].data,
817 sed_rep(&expr_rep(c, expr_pas(c, expr_trm_noopt(sym_a))))
818 );
819 assert_eq!(term0.stack[0].remaining.len(), 1);
820 assert_eq!(
821 term0.stack[0].remaining[0],
822 &expr_pas(c, expr_trm_noopt(sym_b))
823 );
824 let term1 = &lookahead.terminals[1];
825 assert_eq!(term1.regex, sym_b);
826 assert_eq!(term1.remaining.len(), 0);
827 assert_eq!(term1.stack.len(), 0);
828 });
829
830 harness.lookahead("m : ~('a' | 'b') 'c';", "m", |lookahead, _c| {
831 assert_matches!(lookahead.end, End::None);
832 assert!(!lookahead.empty);
833 assert_eq!(lookahead.terminals.len(), 2);
834 let term0 = &lookahead.terminals[0];
835 assert_eq!(term0.regex, sym_a);
836 assert_eq!(term0.remaining.len(), 1);
837 assert_eq!(term0.remaining[0], &expr_trm_noopt(sym_c));
838 assert_eq!(term0.stack.len(), 0);
839 let term1 = &lookahead.terminals[1];
840 assert_eq!(term1.regex, sym_b);
841 assert_eq!(term1.remaining.len(), 1);
842 assert_eq!(term1.remaining[0], &expr_trm_noopt(sym_c));
843 assert_eq!(term1.stack.len(), 0);
844 });
845
846 harness.lookahead("m : ~'a'?;", "m", |lookahead, _c| {
847 assert_matches!(lookahead.end, End::None);
848 assert!(lookahead.empty);
849 assert_eq!(lookahead.terminals.len(), 1);
850 let term0 = &lookahead.terminals[0];
851 assert_eq!(term0.regex, sym_a);
852 assert_eq!(term0.remaining.len(), 0);
853 assert_eq!(term0.stack.len(), 0);
854 });
855
856 harness.lookahead("m : (~'a')?;", "m", |lookahead, _c| {
857 match lookahead.end {
858 End::Push(lookahead) => {
859 assert_matches!(lookahead.end, End::None);
860 assert!(!lookahead.empty);
861 assert_eq!(lookahead.terminals.len(), 1);
862 let term0 = &lookahead.terminals[0];
863 assert_eq!(term0.regex, sym_a);
864 assert_eq!(term0.remaining.len(), 0);
865 assert_eq!(term0.stack.len(), 0);
866 }
867 _ => panic!(),
868 }
869 assert!(lookahead.empty);
870 assert!(lookahead.terminals.is_empty());
872 });
873
874 harness.lookahead("m : (~'a')* 'b';", "m", |lookahead, c| {
875 match lookahead.end {
876 End::Push(lookahead) => {
877 assert_matches!(lookahead.end, End::None);
878 assert!(!lookahead.empty);
879 assert_eq!(lookahead.terminals.len(), 1);
880 let term0 = &lookahead.terminals[0];
881 assert_eq!(term0.regex, sym_a);
882 assert_eq!(term0.remaining.len(), 0);
883 assert_eq!(term0.stack.len(), 1);
884 assert_eq!(
885 term0.stack[0].data,
886 sed_rep(&expr_rep(
887 c,
888 expr_pas(c, expr_trm_noopt(sym_a))
889 ))
890 );
891 assert_eq!(term0.stack[0].remaining.len(), 1);
892 assert_eq!(
893 term0.stack[0].remaining[0],
894 &expr_trm_noopt(sym_b)
895 );
896 }
897 _ => panic!(),
898 }
899 assert!(!lookahead.empty);
900 assert_eq!(lookahead.terminals.len(), 2);
901 let term0 = &lookahead.terminals[0];
902 assert_eq!(term0.regex, sym_a);
903 assert_eq!(term0.remaining.len(), 0);
904 assert_eq!(term0.stack.len(), 1);
905 assert_eq!(
906 term0.stack[0].data,
907 sed_rep(&expr_rep(c, expr_pas(c, expr_trm_noopt(sym_a))))
908 );
909 assert_eq!(term0.stack[0].remaining.len(), 1);
910 assert_eq!(term0.stack[0].remaining[0], &expr_trm_noopt(sym_b));
911 let term1 = &lookahead.terminals[1];
912 assert_eq!(term1.regex, sym_b);
913 assert_eq!(term1.remaining.len(), 0);
914 assert_eq!(term1.stack.len(), 0);
915 });
916
917 harness.lookahead("m : 'a'? ~'b';", "m", |lookahead, c| {
918 match lookahead.end {
919 End::Push(lookahead) => {
920 assert_matches!(lookahead.end, End::None);
921 assert!(!lookahead.empty);
922 assert_eq!(lookahead.terminals.len(), 1);
923 let term0 = &lookahead.terminals[0];
924 assert_eq!(term0.regex, sym_b);
925 assert_eq!(term0.remaining.len(), 0);
926 assert_eq!(term0.stack.len(), 0);
927 }
928 _ => panic!(),
929 }
930 assert!(!lookahead.empty);
931 assert_eq!(lookahead.terminals.len(), 2);
932 let term0 = &lookahead.terminals[0];
933 assert_eq!(term0.regex, sym_a);
934 assert_eq!(term0.remaining.len(), 1);
935 assert_eq!(term0.remaining[0], &expr_pas(c, expr_trm_noopt(sym_b)));
936 assert_eq!(term0.stack.len(), 0);
937 let term1 = &lookahead.terminals[1];
938 assert_eq!(term1.regex, sym_b);
939 assert_eq!(term1.remaining.len(), 0);
940 assert_eq!(term1.stack.len(), 0);
941 });
942 }
943
944 #[test]
945 fn collect_repetition() {
946 let mut harness = Harness::default();
947 let sym_a = harness.symbol("a");
948 let sym_b = harness.symbol("b");
949 let sym_c = harness.symbol("c");
950
951 harness.lookahead("m : 'a'*;", "m", |lookahead, c| {
952 assert_matches!(lookahead.end, End::Illegal);
953 assert!(lookahead.empty);
954 assert_eq!(lookahead.terminals.len(), 1);
955 let term0 = &lookahead.terminals[0];
956 assert_eq!(term0.regex, sym_a);
957 assert_eq!(term0.remaining.len(), 0);
958 assert_eq!(term0.stack.len(), 1);
959 assert_eq!(
960 term0.stack[0].data,
961 sed_rep(&expr_rep(c, expr_trm_noopt(sym_a)))
962 );
963 assert_eq!(term0.stack[0].remaining.len(), 0);
964 });
965
966 harness.lookahead("m : ('a'? 'b' | 'c')*;", "m", |lookahead, c| {
967 let rep = expr_rep(
968 c,
969 expr_alt(
970 c,
971 &[
972 expr_cat(
973 c,
974 &[
975 expr_opt(c, expr_trm_noopt(sym_a)),
976 expr_trm_noopt(sym_b),
977 ],
978 ),
979 expr_trm_noopt(sym_c),
980 ],
981 ),
982 );
983
984 assert_matches!(lookahead.end, End::Illegal);
985 assert!(lookahead.empty);
986 assert_eq!(lookahead.terminals.len(), 3);
987 let term0 = &lookahead.terminals[0];
988 assert_eq!(term0.regex, sym_a);
989 assert_eq!(term0.remaining.len(), 1);
990 assert_eq!(term0.remaining[0], &expr_trm_noopt(sym_b));
991 assert_eq!(term0.stack.len(), 1);
992 assert_eq!(term0.stack[0].data, sed_rep(&rep));
993 assert_eq!(term0.stack[0].remaining.len(), 0);
994 let term1 = &lookahead.terminals[1];
995 assert_eq!(term1.regex, sym_b);
996 assert_eq!(term1.remaining.len(), 0);
997 assert_eq!(term1.stack.len(), 1);
998 assert_eq!(term1.stack[0].data, sed_rep(&rep));
999 assert_eq!(term1.stack[0].remaining.len(), 0);
1000 let term2 = &lookahead.terminals[2];
1001 assert_eq!(term2.regex, sym_c);
1002 assert_eq!(term2.remaining.len(), 0);
1003 assert_eq!(term2.stack.len(), 1);
1004 assert_eq!(term2.stack[0].data, sed_rep(&rep));
1005 assert_eq!(term2.stack[0].remaining.len(), 0);
1006 });
1007 }
1008
1009 #[test]
1010 fn collect_optional() {
1011 let mut harness = Harness::default();
1012 let sym_a = harness.symbol("a");
1013 let sym_b = harness.symbol("b");
1014 let sym_c = harness.symbol("c");
1015
1016 harness.lookahead("m : 'a'?;", "m", |lookahead, _c| {
1017 assert_matches!(lookahead.end, End::Illegal);
1018 assert!(lookahead.empty);
1019 assert_eq!(lookahead.terminals.len(), 1);
1020 let term0 = &lookahead.terminals[0];
1021 assert_eq!(term0.regex, sym_a);
1022 assert_eq!(term0.remaining.len(), 0);
1023 assert_eq!(term0.stack.len(), 0);
1024 });
1025
1026 harness.lookahead("m : ('a' | 'b'* 'c')?;", "m", |lookahead, c| {
1027 assert_matches!(lookahead.end, End::Illegal);
1028 assert!(lookahead.empty);
1029 assert_eq!(lookahead.terminals.len(), 3);
1030 let term0 = &lookahead.terminals[0];
1031 assert_eq!(term0.regex, sym_a);
1032 assert_eq!(term0.remaining.len(), 0);
1033 assert_eq!(term0.stack.len(), 0);
1034 let term1 = &lookahead.terminals[1];
1035 assert_eq!(term1.regex, sym_b);
1036 assert_eq!(term1.remaining.len(), 0);
1037 assert_eq!(term1.stack.len(), 1);
1038 assert_eq!(
1039 term1.stack[0].data,
1040 sed_rep(&expr_rep(c, expr_trm_noopt(sym_b)))
1041 );
1042 assert_eq!(term1.stack[0].remaining.len(), 1);
1043 assert_eq!(term1.stack[0].remaining[0], &expr_trm_noopt(sym_c));
1044 let term2 = &lookahead.terminals[2];
1045 assert_eq!(term2.regex, sym_c);
1046 assert_eq!(term2.remaining.len(), 0);
1047 assert_eq!(term2.stack.len(), 0);
1048 });
1049 }
1050
1051 #[test]
1052 fn collect_alternation() {
1053 let mut harness = Harness::default();
1054 let sym_a = harness.symbol("a");
1055 let sym_b = harness.symbol("b");
1056 let sym_c = harness.symbol("c");
1057
1058 harness.lookahead("m : 'a' | 'b';", "m", |lookahead, _c| {
1059 assert_matches!(lookahead.end, End::Illegal);
1060 assert!(!lookahead.empty);
1061 assert_eq!(lookahead.terminals.len(), 2);
1062 let term0 = &lookahead.terminals[0];
1063 assert_eq!(term0.regex, sym_a);
1064 assert_eq!(term0.remaining.len(), 0);
1065 assert_eq!(term0.stack.len(), 0);
1066 let term1 = &lookahead.terminals[1];
1067 assert_eq!(term1.regex, sym_b);
1068 assert_eq!(term1.remaining.len(), 0);
1069 assert_eq!(term1.stack.len(), 0);
1070 });
1071
1072 harness.lookahead("m : 'a' | 'b' 'c';", "m", |lookahead, _c| {
1073 assert_matches!(lookahead.end, End::Illegal);
1074 assert!(!lookahead.empty);
1075 assert_eq!(lookahead.terminals.len(), 2);
1076 let term0 = &lookahead.terminals[0];
1077 assert_eq!(term0.regex, sym_a);
1078 assert_eq!(term0.remaining.len(), 0);
1079 assert_eq!(term0.stack.len(), 0);
1080 let term1 = &lookahead.terminals[1];
1081 assert_eq!(term1.regex, sym_b);
1082 assert_eq!(term1.remaining.len(), 1);
1083 assert_eq!(term1.remaining[0], &expr_trm_noopt(sym_c));
1084 assert_eq!(term1.stack.len(), 0);
1085 });
1086
1087 harness.lookahead("m : 'a'? | 'b' | 'c'*;", "m", |lookahead, c| {
1088 assert_matches!(lookahead.end, End::Illegal);
1089 assert!(lookahead.empty);
1090 assert_eq!(lookahead.terminals.len(), 3);
1091 let term0 = &lookahead.terminals[0];
1092 assert_eq!(term0.regex, sym_a);
1093 assert_eq!(term0.remaining.len(), 0);
1094 assert_eq!(term0.stack.len(), 0);
1095 let term1 = &lookahead.terminals[1];
1096 assert_eq!(term1.regex, sym_b);
1097 assert_eq!(term1.remaining.len(), 0);
1098 assert_eq!(term1.stack.len(), 0);
1099 let term2 = &lookahead.terminals[2];
1100 assert_eq!(term2.regex, sym_c);
1101 assert_eq!(term2.remaining.len(), 0);
1102 assert_eq!(term2.stack.len(), 1);
1103 assert_eq!(
1104 term2.stack[0].data,
1105 sed_rep(&expr_rep(c, expr_trm_noopt(sym_c)))
1106 );
1107 assert_eq!(term2.stack[0].remaining.len(), 0);
1108 });
1109 }
1110
1111 #[test]
1112 fn collect_concat() {
1113 let mut harness = Harness::default();
1114 let sym_a = harness.symbol("a");
1115 let sym_b = harness.symbol("b");
1116 let sym_c = harness.symbol("c");
1117 let r_key = Key::basic(harness.symbol("r"));
1118
1119 harness.lookahead("m : 'a' 'b';", "m", |lookahead, _c| {
1120 assert_matches!(lookahead.end, End::Illegal);
1121 assert!(!lookahead.empty);
1122 assert_eq!(lookahead.terminals.len(), 1);
1123 let term0 = &lookahead.terminals[0];
1124 assert_eq!(term0.regex, sym_a);
1125 assert_eq!(term0.remaining.len(), 1);
1126 assert_eq!(term0.remaining[0], &expr_trm_noopt(sym_b));
1127 assert_eq!(term0.stack.len(), 0);
1128 });
1129
1130 harness.lookahead("m : ('a' | 'b') 'c';", "m", |lookahead, _c| {
1131 assert_matches!(lookahead.end, End::Illegal);
1132 assert!(!lookahead.empty);
1133 assert_eq!(lookahead.terminals.len(), 2);
1134 let term0 = &lookahead.terminals[0];
1135 assert_eq!(term0.regex, sym_a);
1136 assert_eq!(term0.remaining.len(), 1);
1137 assert_eq!(term0.remaining[0], &expr_trm_noopt(sym_c));
1138 assert_eq!(term0.stack.len(), 0);
1139 let term1 = &lookahead.terminals[1];
1140 assert_eq!(term1.regex, sym_b);
1141 assert_eq!(term1.remaining.len(), 1);
1142 assert_eq!(term1.remaining[0], &expr_trm_noopt(sym_c));
1143 assert_eq!(term1.stack.len(), 0);
1144 });
1145
1146 harness.lookahead("m : 'a'? 'b'* 'c';", "m", |lookahead, c| {
1147 assert_matches!(lookahead.end, End::Illegal);
1148 assert!(!lookahead.empty);
1149 assert_eq!(lookahead.terminals.len(), 3);
1150 let term0 = &lookahead.terminals[0];
1151 assert_eq!(term0.regex, sym_a);
1152 assert_eq!(term0.remaining.len(), 2);
1153 assert_eq!(term0.remaining[0], &expr_rep(c, expr_trm_noopt(sym_b)));
1154 assert_eq!(term0.remaining[1], &expr_trm_noopt(sym_c));
1155 assert_eq!(term0.stack.len(), 0);
1156 let term1 = &lookahead.terminals[1];
1157 assert_eq!(term1.regex, sym_b);
1158 assert_eq!(term1.remaining.len(), 0);
1159 assert_eq!(term1.stack.len(), 1);
1160 assert_eq!(
1161 term1.stack[0].data,
1162 sed_rep(&expr_rep(c, expr_trm_noopt(sym_b)))
1163 );
1164 assert_eq!(term1.stack[0].remaining.len(), 1);
1165 assert_eq!(term1.stack[0].remaining[0], &expr_trm_noopt(sym_c));
1166 let term2 = &lookahead.terminals[2];
1167 assert_eq!(term2.regex, sym_c);
1168 assert_eq!(term2.remaining.len(), 0);
1169 assert_eq!(term2.stack.len(), 0);
1170 });
1171
1172 harness.lookahead("m : 'a'* 'b'?;", "m", |lookahead, c| {
1173 assert_matches!(lookahead.end, End::Illegal);
1174 assert!(lookahead.empty);
1175 assert_eq!(lookahead.terminals.len(), 2);
1176 let term0 = &lookahead.terminals[0];
1177 assert_eq!(term0.regex, sym_a);
1178 assert_eq!(term0.remaining.len(), 0);
1179 assert_eq!(term0.stack.len(), 1);
1180 assert_eq!(
1181 term0.stack[0].data,
1182 sed_rep(&expr_rep(c, expr_trm_noopt(sym_a)))
1183 );
1184 assert_eq!(term0.stack[0].remaining.len(), 1);
1185 assert_eq!(
1186 term0.stack[0].remaining[0],
1187 &expr_opt(c, expr_trm_noopt(sym_b))
1188 );
1189 let term1 = &lookahead.terminals[1];
1190 assert_eq!(term1.regex, sym_b);
1191 assert_eq!(term1.remaining.len(), 0);
1192 assert_eq!(term1.stack.len(), 0);
1193 });
1194
1195 harness.lookahead(
1196 "m : r? 'b' ; r : 'a' r? 'b' ;",
1197 "m",
1198 |lookahead, _c| {
1199 assert_matches!(lookahead.end, End::Illegal);
1200 assert!(!lookahead.empty);
1201 assert_eq!(lookahead.terminals.len(), 2);
1202 let term0 = &lookahead.terminals[0];
1203 assert_eq!(term0.regex, sym_a);
1204 assert_eq!(term0.remaining.len(), 2);
1205 assert_eq!(term0.stack.len(), 1);
1206 assert_eq!(term0.stack[0].data, sed_var(r_key));
1207 assert_eq!(term0.stack[0].remaining.len(), 1);
1208 assert_eq!(term0.stack[0].remaining[0], &expr_trm_noopt(sym_b));
1209 let term1 = &lookahead.terminals[1];
1210 assert_eq!(term1.regex, sym_b);
1211 assert_eq!(term1.remaining.len(), 0);
1212 assert_eq!(term1.stack.len(), 0);
1213 },
1214 );
1215 }
1216
1217 #[test]
1218 fn collect_left_recursion() {
1219 let mut harness = Harness::default();
1220 let sym_a = harness.symbol("a");
1221 let sym_b = harness.symbol("b");
1222 let sym_c = harness.symbol("c");
1223 let m_key = Key::basic(harness.symbol("m"));
1224 let r_key = Key::basic(harness.symbol("r"));
1225
1226 harness.lookahead("m : m ;", "m", |lookahead, _c| {
1227 assert_matches!(lookahead.end, End::Illegal);
1228 assert!(lookahead.empty);
1229 assert_eq!(lookahead.terminals.len(), 0);
1230 });
1231
1232 harness.lookahead("m : m 'a' | 'b';", "m", |lookahead, c| {
1233 assert_matches!(lookahead.end, End::Illegal);
1234 assert!(!lookahead.empty);
1235 assert_eq!(lookahead.terminals.len(), 1);
1236 let term0 = &lookahead.terminals[0];
1237 assert_eq!(term0.regex, sym_b);
1238 assert_eq!(term0.remaining.len(), 1);
1239 assert_eq!(term0.remaining[0], &expr_rep(c, expr_trm_noopt(sym_a)));
1240 assert_eq!(term0.stack.len(), 0);
1241 });
1242
1243 harness.lookahead("m : m 'a' | m 'b' | 'c';", "m", |lookahead, c| {
1244 assert_matches!(lookahead.end, End::Illegal);
1245 assert!(!lookahead.empty);
1246 assert_eq!(lookahead.terminals.len(), 1);
1247 let term0 = &lookahead.terminals[0];
1248 assert_eq!(term0.regex, sym_c);
1249 assert_eq!(term0.remaining.len(), 1);
1250 assert_eq!(
1251 term0.remaining[0],
1252 &expr_rep(
1253 c,
1254 expr_alt(
1255 c,
1256 &[expr_trm_noopt(sym_a), expr_trm_noopt(sym_b)]
1257 )
1258 )
1259 );
1260 assert_eq!(term0.stack.len(), 0);
1261 });
1262
1263 harness.lookahead("m : r? m ; r : 'a' ;", "m", |lookahead, _c| {
1264 assert_matches!(lookahead.end, End::Illegal);
1265 assert!(lookahead.empty);
1266 assert_eq!(lookahead.terminals.len(), 1);
1267 let term0 = &lookahead.terminals[0];
1268 assert_eq!(term0.regex, sym_a);
1269 assert_eq!(term0.remaining.len(), 0);
1270 assert_eq!(term0.stack.len(), 1);
1271 assert_eq!(term0.stack[0].data, sed_var(r_key));
1272 assert_eq!(term0.stack[0].remaining.len(), 1);
1273 assert_eq!(term0.stack[0].remaining[0], &expr_var(m_key));
1274 });
1275 }
1276}