rustemo_compiler/table/
mod.rs

1//! Calculating LR tables
2use std::{
3    cell::RefCell,
4    cmp::{self, Ordering},
5    collections::{BTreeMap, BTreeSet, VecDeque},
6    fmt::{self, Display},
7    iter::{self, repeat},
8    ops::{Index, IndexMut},
9    slice::{Iter, IterMut},
10};
11
12use clap::ValueEnum;
13use colored::Colorize;
14use itertools::{chain, Itertools};
15use rustemo::log;
16
17use crate::{
18    create_index,
19    error::{Error, Result},
20    grammar::{Associativity, Priority, Terminal, DEFAULT_PRIORITY},
21    index::{
22        NonTermIndex, NonTermVec, ProdIndex, ProdVec, StateIndex, StateVec, SymbolIndex,
23        SymbolVec, TermIndex, TermVec,
24    },
25    lang::rustemo_actions::Recognizer,
26    settings::{ParserAlgo, Settings},
27};
28
29use super::grammar::{res_symbol, Grammar};
30
31#[derive(Debug, Clone)]
32pub enum Action {
33    Shift(StateIndex),
34    Reduce(ProdIndex, usize),
35    Accept,
36}
37
38/// Specifies the type of the parsing table used during parsing to decide about
39/// shift/reduce/goto operations.
40#[allow(non_camel_case_types)]
41#[derive(Debug, Default, Clone, PartialEq, Eq, ValueEnum)]
42pub enum TableType {
43    /// Lookahead LR tables. See
44    /// <http://publications.csail.mit.edu/lcs/pubs/pdf/MIT-LCS-TR-065.pdf>
45    LALR,
46    /// A slight modification of LALR tables to prevent some reduce/reduce
47    /// conflicts. Inspired by <https://doi.org/10.1007/BF00290336>
48    #[default]
49    LALR_PAGER,
50    /// LALR tables extended with right-nulled entries. Used for GLR parsing
51    /// (RNGLR). See <https://doi.org/10.1145/1146809.1146810>
52    LALR_RN,
53}
54
55type Firsts = BTreeSet<SymbolIndex>;
56type FirstSets = SymbolVec<Firsts>;
57
58create_index!(ItemIndex, ItemVec);
59
60/// LR State is a set of LR items and a dict of LR automata actions and gotos.
61#[derive(Clone)]
62pub struct LRState<'g> {
63    grammar: &'g Grammar,
64
65    /// The index of this state.
66    pub idx: StateIndex,
67
68    /// The grammar symbol related to this state. Intuitively, the grammar
69    /// symbol seen on a transition to this state. E.g. if the symbol is
70    /// terminal the parser did a Shift operation to enter this state, otherwise
71    /// it did reduce.
72    pub symbol: SymbolIndex,
73
74    /// LR(1) items used to construct this state.
75    items: ItemVec<LRItem>,
76
77    /// A terminal indexed vector of LR actions. Actions instruct LR parser to
78    /// Shift from the input, Reduce the top of the LR stack or accept the
79    /// input. For the deterministic parsing the internal vector of actions can
80    /// contain only one action.
81    pub actions: TermVec<Vec<Action>>,
82
83    /// A non-terminal indexed vector of LR GOTOs. GOTOs represent transitions
84    /// to another state after successful reduction of a non-terminal.
85    pub gotos: NonTermVec<Option<StateIndex>>,
86
87    /// Terminals and finish flags sorted by the priority and most specific
88    /// match (if enabled) for lexical disambiguation. The finish flag, if true,
89    /// indicates that if we already have terminals that matched at this
90    /// location no further termininals should be tried.
91    pub sorted_terminals: Vec<(TermIndex, bool)>,
92
93    /// Each production has a priority. We use this priority to resolve S/R and
94    /// R/R conflicts. Since the Shift operation is executed over terminal symbol
95    /// to resolve S/R we need terminal priority. But, the priority given for a
96    /// terminal directly is used in lexical disambiguation. Instead, we need
97    /// terminal priority inherited from productions. We, say that the priority
98    /// of terminals in S/R resolution will be the priority of the production
99    /// terminal is used in. But, since the same terminal can be used in many
100    /// production we will take the maximum for S/R resolution.
101    max_prior_for_term: BTreeMap<TermIndex, Priority>,
102}
103
104/// Two LR states are equal if they contain the same kernel items.
105impl PartialEq for LRState<'_> {
106    fn eq(&self, other: &Self) -> bool {
107        let self_ki = self.kernel_items();
108        let other_ki = other.kernel_items();
109        self_ki.len() == other_ki.len() && self_ki.iter().zip(other_ki.iter()).all(|(x, y)| x == y)
110    }
111}
112impl Eq for LRState<'_> {}
113
114impl Display for LRState<'_> {
115    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
116        write!(
117            f,
118            "{}\n\t{}",
119            format!(
120                "State {}:{}",
121                self.idx,
122                self.grammar.symbol_name(self.symbol)
123            )
124            .green()
125            .bold(),
126            self.items
127                .iter()
128                .map(|i| i.to_string(self.grammar))
129                .collect::<Vec<_>>()
130                .join("\n\t")
131        )
132    }
133}
134
135impl std::fmt::Debug for LRState<'_> {
136    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
137        f.debug_struct("LRState")
138            .field("idx", &self.idx)
139            .field("symbol", &self.symbol)
140            .field("items", &self.items)
141            .field("actions", &self.actions)
142            .field("gotos", &self.gotos)
143            .field("sorted_terminals", &self.sorted_terminals)
144            .field("max_prior_for_term", &self.max_prior_for_term)
145            .finish()
146    }
147}
148
149impl<'g> LRState<'g> {
150    fn new(grammar: &'g Grammar, index: StateIndex, symbol: SymbolIndex) -> Self {
151        Self {
152            grammar,
153            idx: index,
154            symbol,
155            items: ItemVec::new(),
156            actions: grammar.new_termvec(vec![]),
157            gotos: grammar.new_nontermvec(None),
158            max_prior_for_term: BTreeMap::new(),
159            sorted_terminals: Vec::new(),
160        }
161    }
162
163    fn new_with_items(
164        grammar: &'g Grammar,
165        index: StateIndex,
166        symbol: SymbolIndex,
167        items: ItemVec<LRItem>,
168    ) -> Self {
169        Self {
170            grammar,
171            idx: index,
172            symbol,
173            items,
174            actions: grammar.new_termvec(vec![]),
175            gotos: grammar.new_nontermvec(None),
176            max_prior_for_term: BTreeMap::new(),
177            sorted_terminals: Vec::new(),
178        }
179    }
180
181    fn add_item(mut self, item: LRItem) -> Self {
182        self.items.push(item);
183        self
184    }
185
186    fn kernel_items(&self) -> Vec<&LRItem> {
187        self.items.iter().filter(|i| i.is_kernel()).collect()
188    }
189
190    fn nonkernel_items(&self) -> Vec<&LRItem> {
191        self.items.iter().filter(|i| !i.is_kernel()).collect()
192    }
193
194    /// Closes over LR items of the LRState.
195    ///
196    /// Starting from the given items (usually just kernel items), for each
197    /// item, if right of the dot is a non-terminal, adds all items where LHS is
198    /// a given non-terminal and the dot is at the beginning. In other words,
199    /// adds all missing non-kernel items.
200    fn closure(&mut self, first_sets: &FirstSets, prod_rn_lengths: &Option<ProdVec<usize>>) {
201        loop {
202            // This is OK as Hash uses only non-interior-mutable part of the
203            // LRItem type.
204            #[allow(clippy::mutable_key_type)]
205            let mut new_items: BTreeSet<LRItem> = BTreeSet::new();
206
207            for item in &self.items {
208                if let Some(symbol) = item.symbol_at_position(self.grammar) {
209                    if self.grammar.is_nonterm(symbol) {
210                        let mut new_follow;
211                        // Find first set of substring that follow symbol at
212                        // position
213                        if item.position + 1 < self.grammar.productions[item.prod].rhs.len() {
214                            new_follow = firsts(
215                                self.grammar,
216                                first_sets,
217                                &self.grammar.production_rhs_symbols(item.prod)
218                                    [item.position + 1..],
219                            );
220                            // If symbols that follows the current nonterminal
221                            // can derive EMPTY add follows of current item.
222                            if new_follow.contains(&self.grammar.empty_index) {
223                                new_follow.remove(&self.grammar.empty_index);
224                                new_follow.extend(item.follow.borrow().iter());
225                            }
226                        } else {
227                            // If current item position is at the end add all of
228                            // its follow to the next item.
229                            new_follow = Follow::new();
230                            new_follow.extend(item.follow.borrow().iter());
231                        }
232
233                        // Get all productions of the current non-terminal and
234                        // create LR items with the calculated follow.
235                        let nonterm = self.grammar.symbol_to_nonterm_index(symbol);
236                        for prod in &self.grammar.nonterminals[nonterm].productions {
237                            new_items.insert(LRItem::with_follow(
238                                self.grammar,
239                                *prod,
240                                prod_rn_lengths.as_ref().map(|p| p[*prod]),
241                                new_follow.clone(),
242                            ));
243                        }
244                    }
245                }
246            }
247
248            // Add all new items to state.items. If item is already there update
249            // follow. If there is no change break from the loop.
250            let mut change = false;
251            for new_item in new_items {
252                match self.items.iter_mut().find(|x| *x == &new_item) {
253                    Some(item) => {
254                        // Item already exists, update follows
255                        let l = item.follow.borrow().len();
256                        item.follow
257                            .borrow_mut()
258                            .extend(new_item.follow.borrow().iter());
259                        if item.follow.borrow().len() > l {
260                            change = true;
261                        }
262                    }
263                    None => {
264                        self.items.push(new_item);
265                        change = true;
266                    }
267                }
268            }
269            if !change {
270                break;
271            }
272        }
273    }
274
275    /// Group LR items per grammar symbol right of the dot, and calculate
276    /// terminal max priorities.
277    fn group_per_next_symbol(&mut self) -> BTreeMap<SymbolIndex, Vec<ItemIndex>> {
278        let mut per_next_symbol: BTreeMap<SymbolIndex, Vec<ItemIndex>> = BTreeMap::new();
279
280        for (idx, item) in self.items.iter().enumerate() {
281            let symbol = item.symbol_at_position(self.grammar);
282            if let Some(symbol) = symbol {
283                per_next_symbol.entry(symbol).or_default().push(idx.into());
284                if self.grammar.is_term(symbol) {
285                    let symbol = self.grammar.symbol_to_term_index(symbol);
286                    let prod_prio = self.grammar.productions[item.prod].prio;
287                    self.max_prior_for_term
288                        .entry(symbol)
289                        .and_modify(|v| *v = cmp::max(*v, prod_prio))
290                        .or_insert(prod_prio);
291                }
292            }
293        }
294        per_next_symbol
295    }
296}
297
298/// Represents an item in the items set. Item is defined by a production and a
299/// position inside production (the dot). If the item is of LR_1 type follow set
300/// is also defined. Follow set is a set of terminals that can follow symbol at
301/// the given position in the given production.
302#[derive(Debug, Eq, Clone, PartialOrd, Ord)]
303struct LRItem {
304    prod: ProdIndex,
305
306    /// The length of the production
307    prod_len: usize,
308
309    /// Right-null production length - the last symbol in the production where
310    /// all the following symbols can reduce EMPTY.
311    /// Used in RN-GLR to decide when the item can reduce. `None` for LR.
312    rn_len: Option<usize>,
313
314    /// The position of "the dot" in the RHS of the production
315    position: usize,
316
317    follow: RefCell<Follow>,
318}
319
320impl std::hash::Hash for LRItem {
321    fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
322        self.prod.hash(state);
323        self.position.hash(state);
324    }
325}
326
327impl PartialEq for LRItem {
328    fn eq(&self, other: &Self) -> bool {
329        self.prod == other.prod && self.position == other.position
330    }
331}
332
333/// LRItem is a production with a dot in the RHS.
334///
335/// Intuitively, the dot signifies the position where the parsing process is in
336/// a given state. The beginning position is 0, before the first symbol in a
337/// production RHS. The end position is len(RHS), after the last symbol in a
338/// RHS.
339///
340/// LRItem also keeps a set of follow terminals. The item is valid only if the
341/// production is followed by a terminal from the given follow set.
342///
343/// # Example
344///
345/// ```rust
346/// // If prod with index 5 is A: B a C;
347/// let item = LRItem::new(5)
348///                 .next_item().unwrap()
349///                 .next_item().unwrap();
350/// assert_eq(&item.position, 2)
351/// ```
352///
353/// ```text
354/// A: B a . C;
355///        ^
356///        |------ position is 2
357/// ```
358impl LRItem {
359    #[cfg(test)]
360    fn new(grammar: &Grammar, prod: ProdIndex, rn_len: Option<usize>) -> Self {
361        LRItem {
362            prod,
363            prod_len: grammar.production_len(prod),
364            rn_len,
365            position: 0,
366            follow: RefCell::new(Follow::new()),
367        }
368    }
369
370    fn with_follow(
371        grammar: &Grammar,
372        prod: ProdIndex,
373        rn_len: Option<usize>,
374        follow: Follow,
375    ) -> Self {
376        LRItem {
377            prod,
378            prod_len: grammar.production_len(prod),
379            rn_len,
380            position: 0,
381            follow: RefCell::new(follow),
382        }
383    }
384
385    fn symbol_at_position(&self, grammar: &Grammar) -> Option<SymbolIndex> {
386        Some(res_symbol(
387            grammar.productions.get(self.prod)?.rhs.get(self.position)?,
388        ))
389    }
390
391    /// Moves position to the right.
392    #[inline]
393    fn inc_position(mut self) -> Self {
394        assert!(self.position < self.prod_len);
395        self.position += 1;
396        self
397    }
398
399    /// True if this item belongs to the kernel core.
400    ///
401    /// Kernel core items are those where position is not 0 except the augmented
402    /// production which by definition belongs to the core.
403    #[inline]
404    fn is_kernel(&self) -> bool {
405        self.position > 0 || self.prod == ProdIndex(0)
406    }
407
408    #[inline]
409    fn is_reducing(&self) -> bool {
410        self.position == self.prod_len
411            || match self.rn_len {
412                Some(rn_len) => self.position >= rn_len,
413                None => false,
414            }
415    }
416
417    fn to_string(&self, grammar: &Grammar) -> String {
418        let prod = &grammar.productions[self.prod];
419        let mut rhs = prod
420            .rhs_symbols()
421            .iter()
422            .map(|s| grammar.symbol_name(*s))
423            .collect::<Vec<_>>();
424        rhs.insert(self.position, ".".into());
425        format!(
426            "{}: {}: {}    {{{}}}",
427            prod.idx.to_string().green(),
428            grammar
429                .symbol_name(grammar.nonterm_to_symbol_index(prod.nonterminal))
430                .green(),
431            rhs.join(" "),
432            self.follow
433                .borrow()
434                .iter()
435                .map(|f| grammar.symbol_name(*f))
436                .collect::<Vec<_>>()
437                .join(", ")
438        )
439    }
440}
441
442pub enum ConflictKind {
443    ShiftReduce(ProdIndex),
444    ReduceReduce(ProdIndex, ProdIndex),
445}
446
447pub struct Conflict<'g, 's> {
448    state: &'s LRState<'g>,
449    follow: TermIndex,
450    kind: ConflictKind,
451}
452
453impl Display for Conflict<'_, '_> {
454    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
455        write!(f, "")?;
456        Ok(())
457    }
458}
459
460pub struct LRTable<'g, 's> {
461    pub states: StateVec<LRState<'g>>,
462    pub layout_state: Option<StateIndex>,
463    grammar: &'g Grammar,
464    settings: &'s Settings,
465    first_sets: FirstSets,
466
467    /// Right-nulled length of productions. Used in RNGLR. RN length is a
468    /// position in a production after which all the remaining symbols can
469    /// derive EMPTY.
470    ///
471    /// E.g. if there is 3 symbols at the RHS and the last one can derive EMPTY
472    /// but others can't then its rn length is 2. Or, in other words, this
473    /// length is the minimal length of reduction for this production.
474    pub production_rn_lengths: Option<ProdVec<usize>>,
475}
476
477impl<'g, 's> LRTable<'g, 's> {
478    pub fn new(grammar: &'g Grammar, settings: &'s Settings) -> Result<Self> {
479        let first_sets = first_sets(grammar);
480        let production_rn_lengths = match settings.table_type {
481            TableType::LALR_RN => Some(production_rn_lengths(&first_sets, grammar)),
482            TableType::LALR | TableType::LALR_PAGER => None,
483        };
484        let mut table = Self {
485            grammar,
486            settings,
487            states: StateVec::new(),
488            layout_state: None,
489            first_sets,
490            production_rn_lengths,
491        };
492
493        table.check_empty_sets()?;
494
495        table.calc_states(grammar.augmented_index);
496        if let Some(augmented_layout_index) = grammar.augmented_layout_index {
497            table.layout_state = Some(StateIndex(table.states.len()));
498            table.calc_states(augmented_layout_index)
499        }
500
501        log!("LR states constructed. Updating follows.");
502        table.propagate_follows();
503
504        log!(
505            "Calculate REDUCTION entries in ACTION tables and resolve \
506            possible conflicts."
507        );
508        table.calculate_reductions();
509
510        log!("Sort terminals for lexical disambiguation");
511        table.sort_terminals();
512
513        println!("Terminals: {}", grammar.terminals.len());
514        println!("Non-terminals: {}", grammar.nonterminals().len());
515        println!("Productions: {}", grammar.productions().len());
516        println!("States: {}", table.states.len());
517
518        if settings.print_table {
519            println!("LR TABLE:");
520            println!("{}", table);
521        }
522
523        Ok(table)
524    }
525
526    /// Calculate LR states with GOTOs and ACTIONs for the given Grammar.
527    ///
528    /// This collection of states is used to generate LR/GLR parser tables.
529    fn calc_states(&mut self, start_symbol: SymbolIndex) {
530        let mut current_state_idx = self.states.len();
531
532        let prods = &self.grammar.symbol_to_nonterm(start_symbol).productions;
533        assert_eq!(prods.len(), 1);
534
535        // Create a state for the first production (augmented)
536        let state = LRState::new(self.grammar, StateIndex(current_state_idx), start_symbol)
537            .add_item(LRItem::with_follow(
538                self.grammar,
539                prods[0],
540                self.production_rn_lengths.as_ref().map(|p| p[prods[0]]),
541                Follow::from([self.grammar.stop_index]),
542            ));
543        current_state_idx += 1;
544
545        // States to be processed.
546        let mut state_queue = VecDeque::from([state]);
547
548        log!("Calculating LR automaton states.");
549        while let Some(mut state) = state_queue.pop_front() {
550            // For each state calculate its closure first, i.e. starting from a so
551            // called "kernel items" expand collection with non-kernel items. We
552            // will also calculate GOTO and ACTIONS dicts for each state. These
553            // dicts will be keyed by a grammar symbol.
554            state.closure(&self.first_sets, &self.production_rn_lengths);
555
556            // To find out other states we examine following grammar symbols in the
557            // current state (symbols following current position/"dot") and group
558            // all items by a grammar symbol.
559            let per_next_symbol = state.group_per_next_symbol();
560
561            // Create accept action if possible.
562            for &symbol in per_next_symbol.keys() {
563                if symbol == self.grammar.stop_index {
564                    state.actions[self.grammar.symbol_to_term_index(symbol)] =
565                        vec![Action::Accept];
566                    break;
567                }
568            }
569
570            // Create new states reachable from the current state.
571            let new_states = Self::create_new_states(self.grammar, &state, per_next_symbol);
572
573            // Find states that already exist and try to merge. If not possible
574            // to merge or not found push state to state queue.
575            for mut new_state in new_states {
576                let target_state_symbol = new_state.symbol;
577                let mut new_state_found = true;
578                let mut target_state_idx = StateIndex(current_state_idx);
579                for old_state in self
580                    .states
581                    .iter_mut()
582                    .chain(state_queue.iter_mut())
583                    .chain(iter::once(&mut state))
584                    .filter(|x| **x == new_state)
585                {
586                    // If the same state already exists try to merge.
587                    if Self::merge_state(self.settings, old_state, &new_state) {
588                        new_state_found = false;
589                        target_state_idx = old_state.idx;
590                        break;
591                    }
592                }
593
594                // Create GOTO for non-terminal or Shift Action for terminal.
595                if self.grammar.is_nonterm(target_state_symbol) {
596                    state.gotos[self.grammar.symbol_to_nonterm_index(target_state_symbol)] =
597                        Some(target_state_idx);
598                } else {
599                    let term = self.grammar.symbol_to_term_index(new_state.symbol);
600                    state.actions[term].push(Action::Shift(target_state_idx));
601                }
602
603                if new_state_found {
604                    // Merge is not possible. Create new state.
605                    new_state.idx = StateIndex(current_state_idx);
606
607                    state_queue.push_back(new_state);
608                    current_state_idx += 1;
609                }
610            }
611
612            self.states.push(state);
613        }
614    }
615
616    /// Try to merge new_state to old_state if possible. If not possible return
617    /// false.
618    ///
619    /// If old state has no R/R conflicts additional check is made and merging is
620    /// not done if it would add R/R conflict.
621    fn merge_state(
622        settings: &Settings,
623        old_state: &mut LRState<'g>,
624        new_state: &LRState<'g>,
625    ) -> bool {
626        // States with different kernel sets cannot be merged.
627        if old_state != new_state {
628            return false;
629        }
630
631        let old_state_items = old_state
632            .items
633            .clone()
634            .into_iter()
635            .filter(|item| item.is_kernel());
636
637        // Item pairs of item from an old state and corresponding item from the new state.
638        let item_pairs: Vec<(&mut LRItem, &LRItem)> = iter::zip(
639            old_state.items.iter_mut().filter(|item| item.is_kernel()),
640            old_state_items.map(|x| new_state.items.iter().find(|&i| *i == x).unwrap()),
641        )
642        .collect();
643
644        if settings.table_type != TableType::LALR {
645            // If this is not pure LALR check to see if merging would introduce R/R.
646            // In case it would, do not merge but keep these states split.
647            //
648            // Menhir's week compatibility test (variant of Pager's weak comp. test).
649            // See Definition 2.29 from the paper:
650            // Denny, Joel E., and Brian A. Malloy. "The IELR (1) algorithm for generating
651            // minimal LR (1) parser tables for non-LR (1) grammars with conflict
652            // resolution." Science of Computer Programming 75.11 (2010): 943-979.
653            for (old, new) in &item_pairs {
654                if !old.is_reducing() {
655                    continue;
656                }
657                for (old_in, new_in) in &item_pairs {
658                    if old == old_in {
659                        continue;
660                    }
661                    // ∀t, at least one of the following is true:
662                    // (a) t ∈ {K1 ∩ K2' } ∧ t ∈ {K2 ∩ K1' }.
663                    // (b) t ∈ {K1 ∩ K2 }.
664                    // (c) t ∈ {K1' ∩ K2' }.
665                    for t in chain(
666                        old.follow.borrow().intersection(&*new_in.follow.borrow()),
667                        old_in.follow.borrow().intersection(&*new.follow.borrow()),
668                    ) {
669                        // Test (b) and (c) conditions
670                        if old
671                            .follow
672                            .borrow()
673                            .intersection(&*old_in.follow.borrow())
674                            .all(|x| x != t)
675                            && new
676                                .follow
677                                .borrow()
678                                .intersection(&*new_in.follow.borrow())
679                                .all(|x| x != t)
680                        {
681                            // We have found terminal which would introduce RR
682                            // conflict if states are merged.
683                            return false;
684                        }
685                    }
686                }
687            }
688        }
689
690        // Do the merge by updating old items follow sets.
691        for (old, new) in item_pairs {
692            old.follow.borrow_mut().extend(new.follow.borrow().iter())
693        }
694        true
695    }
696
697    /// Propagate LR items follows.
698    ///
699    /// This is needed due to state merging. Whenever merge occurs, target state
700    /// follows might get updated so we have to propagate those changes to other
701    /// states.
702    fn propagate_follows(&mut self) {
703        let mut changed = true;
704        while changed {
705            changed = false;
706            for state in self.states.iter_mut() {
707                // Refresh closure to propagate follows from kernel items to
708                // non-kernel of the same state as the merge is done only for kernel
709                // items.
710                state.closure(&self.first_sets, &self.production_rn_lengths);
711            }
712
713            for state in self.states.iter() {
714                // Use GOTOs and ACTIONS to propagate follows between states.
715                state
716                    .gotos
717                    .iter()
718                    .filter_map(|x| x.as_ref())
719                    .chain(state.actions.iter().flat_map(|x| {
720                        x.iter().filter_map(|a| match a {
721                            Action::Shift(state) => Some(state),
722                            _ => None,
723                        })
724                    }))
725                    .for_each(|&target_state| {
726                        for target_item in &mut self.states[target_state]
727                            .items
728                            .iter()
729                            .filter(|x| x.is_kernel())
730                        {
731                            // Find corresponding item in state
732                            if let Some(source_item) = state.items.iter().find(|&x| {
733                                x.prod == target_item.prod
734                                    && x.position == target_item.position - 1
735                            }) {
736                                // Update follow of target item with item from state
737                                let follow_len = target_item.follow.borrow().len();
738                                target_item
739                                    .follow
740                                    .borrow_mut()
741                                    .extend(source_item.follow.borrow().iter());
742
743                                // if target item follow was changed set changed to true
744                                if target_item.follow.borrow().len() > follow_len {
745                                    changed = true
746                                }
747                            }
748                        }
749                    })
750            }
751        }
752    }
753
754    /// Calculate reductions entries in action tables and resolve possible
755    /// conflicts.
756    fn calculate_reductions(&mut self) {
757        let mut aug_symbols = vec![self.grammar.augmented_index];
758        if let Some(layout_index) = self.grammar.augmented_layout_index {
759            aug_symbols.push(layout_index);
760        }
761        for state in &mut self.states {
762            for item in state.items.iter().filter(|x| x.is_reducing()) {
763                let prod = &self.grammar.productions[item.prod];
764
765                // Accept if reducing by augmented productions for STOP lookahead
766                if aug_symbols.contains(&self.grammar.nonterm_to_symbol_index(prod.nonterminal)) {
767                    let actions = &mut state.actions[TermIndex(0)];
768                    actions.push(Action::Accept);
769                    continue;
770                }
771
772                let new_reduce = Action::Reduce(item.prod, item.position);
773                for follow_symbol in item.follow.borrow().iter() {
774                    let follow_term = self.grammar.symbol_to_term(*follow_symbol);
775                    let actions = &mut state.actions[follow_term.idx];
776                    if actions.is_empty() {
777                        // No other action are possible for this follow terminal.
778                        // Just register this reduction.
779                        actions.push(new_reduce.clone());
780                    } else {
781                        // Conflict. Try to resolve.
782                        let (shifts, reduces): (Vec<_>, Vec<_>) = actions
783                            .clone()
784                            .into_iter()
785                            .partition(|x| matches!(x, Action::Shift(_) | Action::Accept));
786                        // Only one SHIFT or ACCEPT might exists for a single
787                        // terminal but many REDUCEs might exist.
788                        assert!(shifts.len() <= 1);
789
790                        let mut should_reduce = true;
791                        if let Some(shift) = shifts.first() {
792                            // Shift/Reduce conflict. Use assoc and priority to
793                            // resolve. For disambiguation treat ACCEPT action the
794                            // same as SHIFT.
795                            let shift_prio = match shift {
796                                Action::Accept => DEFAULT_PRIORITY,
797                                _ => state.max_prior_for_term[&follow_term.idx],
798                            };
799                            match prod.prio.cmp(&shift_prio) {
800                                Ordering::Less => {
801                                    // If priority of existing SHIFT action is
802                                    // higher then leave it instead
803                                    should_reduce = false
804                                }
805                                Ordering::Equal => {
806                                    // If priorities are the same use associativity
807                                    // Terminals associativity has priority over
808                                    // production associativity
809                                    match (&prod.assoc, &follow_term.assoc) {
810                                        (Associativity::Left, Associativity::None)
811                                        | (_, Associativity::Right) => {
812                                            // Override SHIFT with this REDUCE
813                                            assert!(actions.len() == 1);
814                                            actions.pop();
815                                        }
816                                        (Associativity::Right, Associativity::None)
817                                        | (_, Associativity::Left) => {
818                                            // If associativity is right leave SHIFT
819                                            // action as "stronger" and don't consider
820                                            // this reduction any more. Right
821                                            // associative reductions can't be in the
822                                            // same set of actions together with SHIFTs.
823                                            should_reduce = false;
824                                        }
825                                        (Associativity::None, Associativity::None) => {
826                                            // If priorities are the same and no
827                                            // associativity defined use preferred
828                                            // strategy.
829                                            let empty = prod.rhs.is_empty();
830                                            let prod_pse = empty
831                                                && self.settings.prefer_shifts_over_empty
832                                                && !prod.nopse;
833                                            let prod_ps = !empty
834                                                && self.settings.prefer_shifts
835                                                && !prod.nops;
836                                            should_reduce = !(prod_pse || prod_ps);
837                                        }
838                                    }
839                                }
840                                Ordering::Greater => {
841                                    // This item operation priority is higher =>
842                                    // override with reduce
843                                    assert!(actions.len() == 1);
844                                    actions.pop();
845                                }
846                            }
847                        }
848
849                        if should_reduce {
850                            if reduces.is_empty() {
851                                actions.push(new_reduce.clone())
852                            } else {
853                                // REDUCE/REDUCE conflicts.
854                                // Try to resolve using priorities.
855                                let reduces_prio = reduces
856                                    .iter()
857                                    .map(|x| match x {
858                                        Action::Reduce(prod, ..) => {
859                                            self.grammar.productions[*prod].prio
860                                        }
861                                        other => panic!("This should not happen. Got {:?}", other),
862                                    })
863                                    .collect::<Vec<_>>();
864                                if reduces_prio.iter().all(|x| prod.prio < *x) {
865                                    // Current product priority is less than all
866                                    // other reductions. Do not add this reduction.
867                                } else if reduces_prio.iter().all(|x| prod.prio > *x) {
868                                    // Current product priority is greater than all
869                                    // other reductions. This reduction should
870                                    // replace all others.
871                                    actions.retain(|x| !matches!(x, Action::Reduce(..)));
872                                    actions.push(new_reduce.clone())
873                                } else {
874                                    // For LR parsing non-empty reductions are
875                                    // preferred over empty...
876                                    if let ParserAlgo::LR = self.settings.parser_algo {
877                                        // ... so remove all empty reductions.
878                                        actions.retain(
879                                            |x| !matches!(x, Action::Reduce(_, len) if *len == 0),
880                                        );
881
882                                        if item.prod_len > 0 || actions.is_empty() {
883                                            // If current reduction is non-empty add it.
884                                            actions.push(new_reduce.clone())
885                                        }
886                                    } else {
887                                        // This R/R conflict can't be resolved.
888                                        // Just add the new reduction and GLR
889                                        // will handle it by investigating all
890                                        // possibilities.
891                                        actions.push(new_reduce.clone())
892                                    }
893                                }
894                            }
895                        }
896                    }
897                }
898            }
899        }
900    }
901
902    /// Sort terminals for each state according to explicit priority and
903    /// terminal recognizer type. String recognizers have precedence over regex
904    /// recognizers if most specific match strategy is enabled. For most
905    /// specific match, longer string recognizers have precedence over shorter.
906    fn sort_terminals(&mut self) {
907        for state in &mut self.states {
908            let mut terminals = state
909                .actions
910                .iter()
911                .enumerate()
912                .filter(|(_, actions)| !actions.is_empty())
913                .map(|(idx, _)| self.grammar.term_by_index(TermIndex(idx)))
914                .collect::<Vec<_>>();
915
916            let term_prio = |term: &Terminal| -> u32 {
917                term.prio * 1000
918                    + if self.settings.lexical_disamb_most_specific {
919                        match &term.recognizer {
920                            Some(recognizer) => {
921                                (match recognizer {
922                                    Recognizer::StrConst(str_rec) => str_rec.as_ref().len(),
923                                    Recognizer::RegexTerm(_) => 0,
924                                }) as u32
925                            }
926                            None => 0,
927                        }
928                    } else {
929                        0
930                    }
931            };
932            terminals.sort_by(|&l, &r| {
933                let l_term_prio = term_prio(l);
934                let r_term_prio = term_prio(r);
935                r_term_prio.cmp(&l_term_prio)
936            });
937
938            // Calculate "finish" flags
939            let mut sorted_terminals: Vec<(TermIndex, bool)> = vec![];
940            let mut last_prio = None;
941            for terminal in terminals {
942                let finish = self.settings.lexical_disamb_most_specific
943                    && matches!(terminal.recognizer, Some(Recognizer::StrConst(_)));
944                let last_finish = last_prio.is_some_and(|prio| terminal.prio != prio);
945                last_prio = Some(terminal.prio);
946                if let Some(t) = sorted_terminals.last_mut() {
947                    t.1 |= last_finish
948                }
949                sorted_terminals.push((terminal.idx, finish));
950            }
951
952            state.sorted_terminals = sorted_terminals;
953        }
954    }
955
956    /// Create new states that can be reached from the given state.
957    fn create_new_states(
958        grammar: &'g Grammar,
959        state: &LRState,
960        per_next_symbol: BTreeMap<SymbolIndex, Vec<ItemIndex>>,
961    ) -> Vec<LRState<'g>> {
962        let mut states = Vec::new();
963        for (symbol, items) in per_next_symbol {
964            let next_state_items = items
965                .into_iter()
966                .map(|i| state.items[i].clone().inc_position())
967                .collect();
968            states.push(LRState::new_with_items(
969                grammar,
970                StateIndex(0), // Temporary value. The caller will set the real index.
971                symbol,
972                next_state_items,
973            ));
974        }
975        states
976    }
977
978    /// Check for states with GOTO links but without SHIFT links.
979    ///
980    /// This is invalid as GOTO links will never be traversed.
981    fn check_empty_sets(&self) -> Result<()> {
982        if let Some((idx, _)) = self
983            .first_sets
984            .iter()
985            .enumerate()
986            .find(|(_, s)| s.is_empty())
987        {
988            return Err(Error::Error(format!(
989                "First set empty for grammar symbol {:?}.\n\
990                 An infinite recursion on the grammar symbol.",
991                &self.grammar.symbol_name(SymbolIndex(idx))
992            )));
993        }
994        Ok(())
995    }
996
997    pub fn get_conflicts(&'s self) -> Vec<Conflict<'g, 's>> {
998        self.states
999            .iter()
1000            .flat_map(|state| {
1001                state
1002                    .actions
1003                    .iter()
1004                    .enumerate()
1005                    .filter(|(_, actions)| actions.len() > 1)
1006                    .flat_map(|(idx, actions)| {
1007                        actions
1008                            .iter()
1009                            .combinations(2)
1010                            .map(move |conflict| (idx, conflict))
1011                    })
1012                    .map(|(term_index, conflict)| {
1013                        let kind = match &conflict[..] {
1014                            [Action::Shift(_), Action::Reduce(prod, _)]
1015                            | [Action::Reduce(prod, _), Action::Shift(_)] => {
1016                                ConflictKind::ShiftReduce(*prod)
1017                            }
1018                            [Action::Reduce(prod1, _), Action::Reduce(prod2, _)] => {
1019                                ConflictKind::ReduceReduce(*prod1, *prod2)
1020                            }
1021                            e => {
1022                                // This cannot happen as we have combinations of size 2.
1023                                print!("{e:?}");
1024                                unreachable!()
1025                            }
1026                        };
1027                        Conflict {
1028                            state,
1029                            follow: TermIndex(term_index),
1030                            kind,
1031                        }
1032                    })
1033            })
1034            .collect()
1035    }
1036
1037    pub fn print_conflicts_report(&self, conflicts: &Vec<Conflict<'g, 's>>) {
1038        for conflict in conflicts {
1039            println!("{} {}", "In".green().bold(), conflict.state);
1040            print!(
1041                "When I saw {} and see token {} ahead I can't decide",
1042                self.grammar.symbol_name(conflict.state.symbol).green(),
1043                self.grammar
1044                    .symbol_name(self.grammar.term_to_symbol_index(conflict.follow))
1045                    .green()
1046            );
1047            match conflict.kind {
1048                ConflictKind::ShiftReduce(prod) => {
1049                    println!(
1050                        " should I shift or reduce by production:\n{}\n",
1051                        self.grammar.productions[prod]
1052                            .to_string(self.grammar)
1053                            .green()
1054                    );
1055                }
1056                ConflictKind::ReduceReduce(prod1, prod2) => {
1057                    println!(
1058                        " should I reduce by production:\n{}\nor production:\n{}\n",
1059                        self.grammar.productions[prod1]
1060                            .to_string(self.grammar)
1061                            .green(),
1062                        self.grammar.productions[prod2]
1063                            .to_string(self.grammar)
1064                            .green()
1065                    );
1066                }
1067            }
1068        }
1069        let shift_reduce_len = conflicts
1070            .iter()
1071            .filter(|c| matches!(c.kind, ConflictKind::ShiftReduce(..)))
1072            .count();
1073        let reduce_reduce_len = conflicts
1074            .iter()
1075            .filter(|c| matches!(c.kind, ConflictKind::ReduceReduce(..)))
1076            .count();
1077        println!(
1078            "{}",
1079            format!(
1080                "{} conflict(s). {} Shift/Reduce and {} Reduce/Reduce.",
1081                shift_reduce_len + reduce_reduce_len,
1082                shift_reduce_len,
1083                reduce_reduce_len
1084            )
1085            .green()
1086        );
1087    }
1088
1089    /// Maximal number of actions per state/token. For LR can't be >1.
1090    #[inline]
1091    pub fn max_actions(&self) -> usize {
1092        self.states
1093            .iter()
1094            .map(|state| state.actions.iter().map(|a| a.len()).max().unwrap_or(0))
1095            .max()
1096            .unwrap()
1097    }
1098
1099    #[inline]
1100    pub fn max_recognizers(&self) -> usize {
1101        self.states
1102            .iter()
1103            .map(|state| state.actions.iter().filter(|a| !a.is_empty()).count())
1104            .max()
1105            .unwrap()
1106    }
1107
1108    pub fn to_dot(&self) -> String {
1109        let mut dot = String::from(
1110            r#"
1111            digraph grammar {
1112            rankdir=LR
1113            fontname = "Bitstream Vera Sans"
1114            fontsize = 8
1115            node[
1116                shape=record,
1117                style=filled,
1118                fillcolor=aliceblue
1119            ]
1120            nodesep = 0.3
1121            edge[dir=black,arrowtail=empty]
1122
1123        "#,
1124        );
1125
1126        let dot_escape = |s: &String| {
1127            s.replace('\n', r"\n")
1128                .replace('\\', "\\\\")
1129                .replace('"', r#"\""#)
1130                .replace('|', r"\|")
1131                .replace('{', r"\{")
1132                .replace('}', r"\}")
1133                .replace('>', r"\>")
1134                .replace('<', r"\<")
1135                .replace('?', r"\?")
1136        };
1137
1138        colored::control::set_override(false);
1139
1140        let conflicts = self.get_conflicts();
1141        let is_conflict_state = |state: &LRState| conflicts.iter().any(|s| s.state == state);
1142
1143        for state in &self.states {
1144            let mut kernel_items_str = String::new();
1145            for item in state.kernel_items() {
1146                kernel_items_str += &format!("{}\\l", dot_escape(&item.to_string(self.grammar)));
1147            }
1148
1149            let nonkernel_items = state.nonkernel_items();
1150            let mut nonkernel_items_str = if !nonkernel_items.is_empty() {
1151                String::from("|")
1152            } else {
1153                String::new()
1154            };
1155
1156            for item in nonkernel_items {
1157                nonkernel_items_str +=
1158                    &format!("{}\\l", dot_escape(&item.to_string(self.grammar)));
1159            }
1160
1161            let mut reductions: Vec<String> = vec![];
1162            for term in &self.grammar.terminals {
1163                let mut term_reduction_prods: Vec<String> = vec![];
1164                for action in &state.actions[term.idx] {
1165                    match action {
1166                        Action::Shift(target_state_idx) => {
1167                            dot += &format!(
1168                                "{} -> {} [label=\"SHIFT:{}\"]\n",
1169                                state.idx, target_state_idx, term.name
1170                            )
1171                        }
1172                        Action::Reduce(prod_idx, len) => {
1173                            term_reduction_prods.push(format!("({},{})", prod_idx, len));
1174                        }
1175                        Action::Accept => {
1176                            dot += &format!("{} -> ACCEPT [label=\"{}\"]\n", state.idx, term.name)
1177                        }
1178                    }
1179                }
1180                if !term_reduction_prods.is_empty() {
1181                    let r = term_reduction_prods.join(", ");
1182                    reductions.push(if term_reduction_prods.len() > 1 {
1183                        format!("{}:[{}]", dot_escape(&term.name), r)
1184                    } else {
1185                        format!("{}:{}", dot_escape(&term.name), r)
1186                    });
1187                }
1188            }
1189            let reductions = if !reductions.is_empty() {
1190                format!("|Reductions:\\l{}", reductions.join(", "))
1191            } else {
1192                String::new()
1193            };
1194
1195            dot += &format!(
1196                "{} [label=\"{}|{}{}{}\"{}]\n",
1197                state.idx,
1198                dot_escape(&format!(
1199                    "{}:{}",
1200                    state.idx,
1201                    self.grammar.symbol_name(state.symbol)
1202                )),
1203                kernel_items_str,
1204                nonkernel_items_str,
1205                reductions,
1206                if is_conflict_state(state) {
1207                    ", fillcolor=\"lightpink\""
1208                } else {
1209                    ""
1210                }
1211            );
1212
1213            // GOTOs
1214            for nonterm in &self.grammar.nonterminals {
1215                if let Some(target_state_idx) = state.gotos[nonterm.idx] {
1216                    dot += &format!(
1217                        "{} -> {} [label=\"GOTO:{}\"]\n",
1218                        state.idx, target_state_idx, nonterm.name
1219                    )
1220                }
1221            }
1222        }
1223        colored::control::unset_override();
1224
1225        dot += "\n}\n";
1226        dot
1227    }
1228}
1229
1230fn production_rn_lengths(
1231    first_sets: &SymbolVec<BTreeSet<SymbolIndex>>,
1232    grammar: &Grammar,
1233) -> ProdVec<usize> {
1234    let mut prod_rn_lens = ProdVec::new();
1235    for production in &grammar.productions {
1236        let mut rn_len = production.rhs.len();
1237        for symbol in production.rhs_symbols().iter().rev() {
1238            if first_sets[*symbol].contains(&grammar.empty_index) {
1239                rn_len -= 1;
1240            } else {
1241                break;
1242            }
1243        }
1244        prod_rn_lens.push(rn_len)
1245    }
1246    prod_rn_lens
1247}
1248
1249impl Display for LRTable<'_, '_> {
1250    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1251        for state in &self.states {
1252            writeln!(
1253                f,
1254                "\n{}",
1255                format!(
1256                    "State {}:{}",
1257                    state.idx,
1258                    self.grammar.symbol_name(state.symbol)
1259                )
1260                .green(),
1261            )?;
1262            for item in &state.items {
1263                writeln!(f, "\t{}", item.to_string(self.grammar))?;
1264            }
1265            let actions = state
1266                .actions
1267                .iter()
1268                .enumerate()
1269                .filter(|(_, a)| !a.is_empty())
1270                .flat_map(|(i, a)| repeat(i).zip(a.iter()))
1271                .map(|(i, a)| {
1272                    (
1273                        self.grammar.terminals[TermIndex(i)].name.clone(),
1274                        match a {
1275                            Action::Shift(s) => format!("Shift to {s}"),
1276                            Action::Reduce(p, l) => {
1277                                format!(
1278                                    "Reduce for len {l} by:   {}",
1279                                    self.grammar.productions[*p].to_string(self.grammar)
1280                                )
1281                            }
1282                            Action::Accept => "Accept".into(),
1283                        },
1284                    )
1285                })
1286                .collect::<Vec<_>>();
1287
1288            if !actions.is_empty() {
1289                writeln!(f, "{}", "\tACTIONS:".green())?;
1290                for (term, action) in actions {
1291                    writeln!(f, "\t\t{term} {} {action}", "=>".green())?;
1292                }
1293            }
1294
1295            // GOTOs
1296            let gotos = state
1297                .gotos
1298                .iter()
1299                .enumerate()
1300                .filter(|(_, g)| g.is_some())
1301                .map(|(idx, g)| {
1302                    let g = g.unwrap();
1303                    (
1304                        self.grammar.nonterminals[NonTermIndex(idx)].name.clone(),
1305                        format!(
1306                            "State {}:{}",
1307                            self.grammar.symbol_name(self.states[g].symbol),
1308                            g.0
1309                        ),
1310                    )
1311                })
1312                .collect::<Vec<_>>();
1313
1314            if !gotos.is_empty() {
1315                writeln!(f, "{}", "\tGOTOs:".green())?;
1316                for (nonterm, goto) in gotos {
1317                    writeln!(f, "\t\t{nonterm} {} {goto}", "=>".green())?;
1318                }
1319            }
1320        }
1321        Ok(())
1322    }
1323}
1324
1325/// Calculates the sets of terminals that can start the sentence derived from all
1326/// grammar symbols.
1327///
1328/// The Dragon book p. 221.
1329fn first_sets(grammar: &Grammar) -> FirstSets {
1330    let mut first_sets = SymbolVec::new();
1331
1332    // First set for each terminal contains only the terminal itself.
1333    for terminal in &grammar.terminals {
1334        let mut new_set = Firsts::new();
1335        new_set.insert(terminal.idx.symbol_index());
1336        first_sets.push(new_set);
1337    }
1338
1339    // Initialize empty sets for nonterminals
1340    grammar
1341        .nonterminals
1342        .iter()
1343        .for_each(|_| first_sets.push(Firsts::new()));
1344
1345    // EMPTY derives EMPTY
1346    first_sets[grammar.empty_index].insert(grammar.empty_index);
1347
1348    let mut additions = true;
1349    while additions {
1350        additions = false;
1351        for production in &grammar.productions {
1352            let lhs_nonterm = grammar.nonterm_to_symbol_index(production.nonterminal);
1353
1354            let rhs_firsts = firsts(grammar, &first_sets, &production.rhs_symbols());
1355
1356            let lhs_len = first_sets[lhs_nonterm].len();
1357
1358            first_sets[lhs_nonterm].extend(rhs_firsts);
1359
1360            // Check if any addition is actually performed.
1361            if lhs_len < first_sets[lhs_nonterm].len() {
1362                additions = true
1363            }
1364        }
1365    }
1366    first_sets
1367}
1368
1369/// For the given sequence of symbols finds a set of FIRST terminals.
1370///
1371/// Finds all terminals which can start the given sequence of symbols. Note that
1372/// if all symbols in the sequence can derive EMPTY, EMPTY will be a part of the
1373/// returned set.
1374fn firsts(grammar: &Grammar, first_sets: &FirstSets, symbols: &[SymbolIndex]) -> Firsts {
1375    let mut firsts = Firsts::new();
1376    let mut break_out = false;
1377    for &symbol in symbols {
1378        let symbol_firsts = &first_sets[symbol];
1379        let mut empty = false;
1380
1381        for first in symbol_firsts {
1382            if *first == grammar.empty_index {
1383                empty = true;
1384            } else {
1385                firsts.insert(*first);
1386            }
1387        }
1388
1389        // We should proceed to the next symbol in sequence only if the current
1390        // symbol can produce EMPTY.
1391        if !empty {
1392            break_out = true;
1393            break;
1394        }
1395    }
1396    if !break_out {
1397        // If we reached the end of symbol sequence and each symbol along the
1398        // way could derive EMPTY than we must add EMPTY to the firsts.
1399        firsts.insert(grammar.empty_index);
1400    }
1401    firsts
1402}
1403
1404/// Calculates the sets of terminals that can follow some non-terminal for the
1405/// given grammar.
1406///
1407/// The dragon book p.221
1408/// Currently unused
1409type Follow = BTreeSet<SymbolIndex>;
1410#[allow(dead_code)]
1411type FollowSets = SymbolVec<Follow>;
1412#[cfg(test)]
1413fn follow_sets(grammar: &Grammar, first_sets: &FirstSets) -> FollowSets {
1414    let mut follow_sets = FollowSets::new();
1415    for _ in 0..first_sets.len() {
1416        follow_sets.push(Follow::new());
1417    }
1418
1419    // Rule 1: Place $ in FOLLOW(S), where S is the start symbol, and $ is
1420    // the input right endmarker.
1421    follow_sets[grammar.augmented_index].insert(grammar.stop_index);
1422
1423    let mut additions = true;
1424    while additions {
1425        additions = false;
1426        for production in &grammar.productions {
1427            let lhs_symbol = grammar.nonterm_to_symbol_index(production.nonterminal);
1428
1429            // Rule 2: If there is a production A -> α B β then everything in
1430            // FIRST(β) except EMPTY is in FOLLOW(B).
1431            for idx in 0..production.rhs.len() {
1432                let rhs_symbol = production.rhs_symbol(idx);
1433                let elements = follow_sets[rhs_symbol].len();
1434                let mut break_out = false;
1435                for rnext in &production.rhs[idx + 1..] {
1436                    let follow_symbols = &first_sets[res_symbol(rnext)];
1437
1438                    follow_sets[rhs_symbol]
1439                        .extend(follow_symbols.iter().filter(|&&s| s != grammar.empty_index));
1440
1441                    if !follow_symbols.contains(&grammar.empty_index) {
1442                        break_out = true;
1443                        break;
1444                    }
1445                }
1446
1447                if !break_out {
1448                    // Rule 3: If all symbols right of current RHS produce EMPTY
1449                    // then this RHS symbol must contain all what follows symbol
1450                    // at LHS.
1451                    let lhs_follows: Follow = follow_sets[lhs_symbol].iter().copied().collect();
1452                    follow_sets[rhs_symbol].extend(lhs_follows.iter());
1453                }
1454
1455                if follow_sets[rhs_symbol].len() > elements {
1456                    additions = true
1457                }
1458            }
1459        }
1460    }
1461    follow_sets
1462}
1463
1464#[cfg(test)]
1465mod tests {
1466
1467    use std::cell::RefCell;
1468    use std::collections::BTreeSet;
1469
1470    use crate::index::{ProdIndex, StateIndex, SymbolIndex};
1471    use crate::table::{first_sets, follow_sets, ItemIndex, LRTable, TableType};
1472    use crate::{
1473        grammar::Grammar,
1474        output_cmp,
1475        settings::Settings,
1476        table::{Follow, LRItem},
1477    };
1478
1479    use super::{production_rn_lengths, LRState};
1480
1481    fn follow<T, I>(indexes: T) -> BTreeSet<SymbolIndex>
1482    where
1483        T: IntoIterator<Item = I>,
1484        I: Into<SymbolIndex>,
1485    {
1486        indexes.into_iter().map(|i| i.into()).collect()
1487    }
1488
1489    fn test_grammar() -> Grammar {
1490        r#"
1491        E: T Ep;
1492        Ep: "+" T Ep | EMPTY;
1493        T: F Tp;
1494        Tp: "*" F Tp | EMPTY;
1495        F: "(" E ")" | "id";
1496
1497        terminals
1498        Plus: "+";
1499        Mul: "*";
1500        LParen: "(";
1501        RParen: ")";
1502        id: "id";
1503        "#
1504        .parse()
1505        .unwrap()
1506    }
1507
1508    fn test_grammar_2() -> Grammar {
1509        r#"
1510        E: E "+" T | T;
1511        T: T "*" F | F;
1512        F: "(" E ")" | "id";
1513
1514        terminals
1515        Plus: "+";
1516        Mul: "*";
1517        LParen: "(";
1518        RParen: ")";
1519        id: "id";
1520        "#
1521        .parse()
1522        .unwrap()
1523    }
1524
1525    /// Grammar from the Dragon book, p.278
1526    /// This grammar is LR(1) but not LALR.
1527    /// See also: https://www.gnu.org/software/bison/manual/bison.html#Mysterious-Conflicts
1528    fn test_non_lalr_grammar() -> Grammar {
1529        r#"
1530        S: A "a" | "b" A "c" | B "c" | "b" B "a";
1531        A: "d";
1532        B: "d";
1533        terminals
1534        a_t: "a";
1535        b_t: "b";
1536        c_t: "c";
1537        d_t: "d";
1538        "#
1539        .parse()
1540        .unwrap()
1541    }
1542
1543    fn test_ambiguous_grammar() -> Grammar {
1544        r#"
1545        E: E "+" E {1, left}
1546            | E "*" E {2, left}
1547            | E "^" E {3, right}
1548            | "(" E ")"
1549            | "id";
1550
1551        terminals
1552        Plus: "+";
1553        Mul: "*";
1554        Power: "^";
1555        LParen: "(";
1556        RParen: ")";
1557        id: "id";
1558        "#
1559        .parse()
1560        .unwrap()
1561    }
1562
1563    #[test]
1564    fn test_first_sets() {
1565        let grammar = test_grammar();
1566        let first_sets = first_sets(&grammar);
1567
1568        assert_eq!(first_sets.len(), 13);
1569
1570        // First of terminal is just a terminal itself.
1571        assert_eq!(
1572            &first_sets[grammar.symbol_index("id")],
1573            &follow(grammar.symbol_indexes(&["id"]))
1574        );
1575
1576        assert_eq!(
1577            &first_sets[grammar.symbol_index("F")],
1578            &follow(grammar.symbol_indexes(&["LParen", "id"]))
1579        );
1580        assert_eq!(
1581            &first_sets[grammar.symbol_index("T")],
1582            &follow(grammar.symbol_indexes(&["LParen", "id"]))
1583        );
1584        assert_eq!(
1585            &first_sets[grammar.symbol_index("E")],
1586            &follow(grammar.symbol_indexes(&["LParen", "id"]))
1587        );
1588        assert_eq!(
1589            &first_sets[grammar.symbol_index("Ep")],
1590            &follow(grammar.symbol_indexes(&["Plus", "EMPTY"]))
1591        );
1592        assert_eq!(
1593            &first_sets[grammar.symbol_index("Tp")],
1594            &follow(grammar.symbol_indexes(&["Mul", "EMPTY"]))
1595        );
1596    }
1597
1598    #[test]
1599    fn test_follow_sets() {
1600        let grammar = test_grammar();
1601        let follow_sets = follow_sets(&grammar, &first_sets(&grammar));
1602
1603        assert_eq!(
1604            &follow_sets[grammar.symbol_index("E")],
1605            &follow(grammar.symbol_indexes(&["RParen", "STOP"]))
1606        );
1607        assert_eq!(
1608            &follow_sets[grammar.symbol_index("Ep")],
1609            &follow(grammar.symbol_indexes(&["RParen", "STOP"]))
1610        );
1611        assert_eq!(
1612            &follow_sets[grammar.symbol_index("T")],
1613            &follow(grammar.symbol_indexes(&["Plus", "RParen", "STOP"]))
1614        );
1615        assert_eq!(
1616            &follow_sets[grammar.symbol_index("Tp")],
1617            &follow(grammar.symbol_indexes(&["Plus", "RParen", "STOP"]))
1618        );
1619    }
1620
1621    #[test]
1622    fn test_prooduction_rn_lengths() {
1623        let grammar = test_grammar();
1624        let first_sets = first_sets(&grammar);
1625
1626        let production_rn_lengths = production_rn_lengths(&first_sets, &grammar);
1627
1628        // RN length of "E: T Ep" is 1 as "Ep" can derive EMPTY but "T" can't.
1629        assert_eq!(production_rn_lengths[ProdIndex(1)], 1);
1630
1631        // RN length of "Ep: "+" T Ep" is 2.
1632        assert_eq!(production_rn_lengths[ProdIndex(2)], 2);
1633    }
1634
1635    #[test]
1636    fn test_symbol_at_position() {
1637        let grammar = test_grammar();
1638
1639        let prod = ProdIndex(1);
1640        let mut item = LRItem::new(&grammar, prod, None);
1641        assert_eq!(
1642            &grammar.symbol_names(grammar.productions[prod].rhs_symbols()),
1643            &["T", "Ep"]
1644        );
1645        assert_eq!(
1646            item.symbol_at_position(&grammar).unwrap(),
1647            grammar.symbol_index("T")
1648        );
1649        item.position = 1;
1650        assert_eq!(
1651            &grammar.symbol_name(item.symbol_at_position(&grammar).unwrap()),
1652            "Ep"
1653        );
1654        item.position = 2;
1655        assert!(item.symbol_at_position(&grammar).is_none());
1656        item.position = 3;
1657        assert!(item.symbol_at_position(&grammar).is_none());
1658    }
1659
1660    #[test]
1661    fn test_group_per_next_symbol() {
1662        let grammar = test_ambiguous_grammar();
1663
1664        // Create some LR state
1665        let mut lr_state = LRState::new(&grammar, 0.into(), grammar.symbol_index("E"))
1666            .add_item(LRItem {
1667                prod: 1.into(),
1668                prod_len: grammar.production_len(1.into()),
1669                rn_len: None,
1670                position: 1,
1671                follow: RefCell::new(Follow::new()),
1672            })
1673            .add_item(LRItem {
1674                prod: 2.into(),
1675                prod_len: grammar.production_len(2.into()),
1676                rn_len: None,
1677                position: 1,
1678                follow: RefCell::new(Follow::new()),
1679            })
1680            .add_item(LRItem {
1681                prod: 3.into(),
1682                prod_len: grammar.production_len(2.into()),
1683                rn_len: None,
1684                position: 1,
1685                follow: RefCell::new(Follow::new()),
1686            })
1687            .add_item(LRItem {
1688                prod: 4.into(),
1689                prod_len: grammar.production_len(3.into()),
1690                rn_len: None,
1691                position: 2,
1692                follow: RefCell::new(Follow::new()),
1693            });
1694
1695        let per_next_symbol = lr_state.group_per_next_symbol();
1696
1697        // log!("Symbols: {:#?}", grammar.symbol_names(per_next_symbol.keys()));
1698        // Symbols: ["+", "*", ")"]
1699        //log!("Pernext: {:?}", per_next_symbol);
1700        // Pernext: {SymbolIndex(1): [ItemIndex(0)], SymbolIndex(2): [ItemIndex(1)], SymbolIndex(4): [ItemIndex(2)]}
1701
1702        // Check items grouping per symbol
1703        assert_eq!(per_next_symbol.len(), 4);
1704        assert_eq!(
1705            per_next_symbol.keys().cloned().collect::<Vec<_>>(),
1706            [1, 2, 3, 5]
1707                .iter()
1708                .map(|v| SymbolIndex(*v))
1709                .collect::<Vec<_>>()
1710        );
1711        assert_eq!(
1712            per_next_symbol.values().cloned().collect::<Vec<_>>(),
1713            vec![
1714                vec![0.into()],
1715                vec![1.into()],
1716                vec![2.into()],
1717                vec![3.into()]
1718            ]
1719        );
1720
1721        // Check production based term priorities
1722        assert_eq!(
1723            lr_state.max_prior_for_term
1724                [&grammar.symbol_to_term_index(grammar.term_by_name["Power"])],
1725            3
1726        );
1727        assert_eq!(
1728            lr_state.max_prior_for_term
1729                [&grammar.symbol_to_term_index(grammar.term_by_name["Mul"])],
1730            2
1731        );
1732        assert_eq!(
1733            lr_state.max_prior_for_term
1734                [&grammar.symbol_to_term_index(grammar.term_by_name["Plus"])],
1735            1
1736        );
1737    }
1738
1739    #[test]
1740    fn test_merge_states() {
1741        let grammar = test_grammar();
1742        let lr_item_1 = LRItem {
1743            prod: ProdIndex(1),
1744            prod_len: 2,
1745            rn_len: None,
1746            position: 2,
1747            follow: RefCell::new(Follow::new()),
1748        };
1749        let lr_item_2 = LRItem {
1750            prod: ProdIndex(2),
1751            prod_len: 3,
1752            rn_len: None,
1753            position: 3,
1754            follow: RefCell::new(Follow::new()),
1755        };
1756        let old_state = LRState::new(&grammar, 0.into(), 0.into())
1757            .add_item(LRItem {
1758                follow: RefCell::new(follow([1, 3])),
1759                ..lr_item_1
1760            })
1761            .add_item(LRItem {
1762                follow: RefCell::new(follow([2])),
1763                ..lr_item_2
1764            });
1765
1766        // This should be merged as there are no introduced R/R conflicts
1767        let new_state_1 = LRState::new(&grammar, 0.into(), 0.into())
1768            .add_item(LRItem {
1769                follow: RefCell::new(follow([1])),
1770                ..lr_item_1
1771            })
1772            .add_item(LRItem {
1773                follow: RefCell::new(follow([2, 4])),
1774                ..lr_item_2
1775            });
1776        let mut old_state_1 = old_state.clone();
1777        let settings = Settings::default();
1778        assert!(LRTable::merge_state(
1779            &settings,
1780            &mut old_state_1,
1781            &new_state_1
1782        ));
1783        // When the merge succeed verify that items follows are indeed extended.
1784        assert_eq!(
1785            *old_state_1.items[ItemIndex(0)].follow.borrow(),
1786            follow([1, 3])
1787        );
1788        assert_eq!(
1789            *old_state_1.items[ItemIndex(1)].follow.borrow(),
1790            follow([2, 4])
1791        );
1792
1793        // This merge introduces new R/R conflict as the second item has 1 in
1794        // the follow set. Term 1 exists in the first item of the old state so
1795        // merging will make two items eligible for reduction on the term 1 in
1796        // the input.
1797        let new_state_2 = LRState::new(&grammar, 0.into(), 0.into())
1798            .add_item(LRItem {
1799                follow: RefCell::new(follow([3])),
1800                ..lr_item_1
1801            })
1802            .add_item(LRItem {
1803                follow: RefCell::new(follow([2, 1])),
1804                ..lr_item_2
1805            });
1806        let mut old_state_2 = old_state.clone();
1807        assert!(!LRTable::merge_state(
1808            &settings,
1809            &mut old_state_2,
1810            &new_state_2
1811        ));
1812        // Verify that no merge happened
1813        assert_eq!(
1814            *old_state_2.items[ItemIndex(0)].follow.borrow(),
1815            follow([1, 3])
1816        );
1817        assert_eq!(
1818            *old_state_2.items[ItemIndex(1)].follow.borrow(),
1819            follow([2])
1820        );
1821
1822        // The last thing to check is situation where new state has R/R
1823        // conflicts and there are no additional merge introduced R/R conflicts.
1824        // This time we should merge as the R/R conflict is not introduced by
1825        // merge process but exists due to the grammar not being LR(1).
1826        let new_state_3 = LRState::new(&grammar, 0.into(), 0.into())
1827            .add_item(LRItem {
1828                follow: RefCell::new(follow([1, 3])),
1829                ..lr_item_1
1830            })
1831            .add_item(LRItem {
1832                follow: RefCell::new(follow([2, 1])),
1833                ..lr_item_2
1834            });
1835        let mut old_state_3 = old_state.clone();
1836        assert!(LRTable::merge_state(
1837            &settings,
1838            &mut old_state_3,
1839            &new_state_3
1840        ));
1841        // Verify that merge happened
1842        assert_eq!(
1843            *old_state_3.items[ItemIndex(0)].follow.borrow(),
1844            follow([1, 3])
1845        );
1846        assert_eq!(
1847            *old_state_3.items[ItemIndex(1)].follow.borrow(),
1848            follow([2, 1])
1849        );
1850    }
1851
1852    #[test]
1853    fn test_closure() {
1854        let grammar = test_grammar();
1855        let firsts = first_sets(&grammar);
1856
1857        // Create some LR state
1858        let mut lr_state =
1859            LRState::new(&grammar, StateIndex(0), grammar.symbol_index("T")).add_item(
1860                LRItem::with_follow(&grammar, ProdIndex(1), None, follow([grammar.stop_index])),
1861            );
1862
1863        lr_state.closure(&firsts, &None);
1864
1865        let prods = [1, 4, 7, 8];
1866        let follow_sets = [
1867            grammar.symbol_indexes(&["STOP"]),
1868            grammar.symbol_indexes(&["STOP", "Plus"]),
1869            grammar.symbol_indexes(&["STOP", "Plus", "Mul"]),
1870            grammar.symbol_indexes(&["STOP", "Plus", "Mul"]),
1871        ];
1872
1873        assert_eq!(lr_state.items.len(), 4);
1874
1875        itertools::izip!(&lr_state.items, prods, follow_sets).for_each(|(item, prod, follows)| {
1876            assert_eq!(item.prod, prod.into());
1877            assert!(item.follow.borrow().iter().eq(follows.iter()));
1878        });
1879
1880        log!("{:?}", lr_state);
1881    }
1882
1883    #[test]
1884    fn test_lr_states_for_grammar_2() {
1885        let grammar = test_grammar_2();
1886
1887        let settings = Settings::new().table_type(TableType::LALR);
1888
1889        let table = LRTable::new(&grammar, &settings).unwrap();
1890        output_cmp!(
1891            "src/table/grammar_2.expected",
1892            format!("{:#?}", table.states)
1893        );
1894    }
1895
1896    #[test]
1897    fn test_lr_states_for_non_lalr_grammar() {
1898        let grammar = test_non_lalr_grammar();
1899
1900        // Calculating LR tables with LALR method will result in a state with
1901        // R/R conflicts. So, deterministic LR parsing method cannot be used for
1902        // this grammar and LALR construction method.
1903        //
1904        // Conflicts are found in state 2 which is entered when 'd' is
1905        // recognized in the input. There are two R/R conflicts, for inputs 'a'
1906        // and 'c'. In both case parser may reduce both A and B.
1907        let settings = Settings::new().table_type(TableType::LALR);
1908
1909        let table = LRTable::new(&grammar, &settings).unwrap();
1910
1911        output_cmp!(
1912            "src/table/grammar_nonlalr_lalr.expected",
1913            format!("{grammar}\n\n{:#?}", table.states)
1914        );
1915
1916        // In LALR_PAGER construction method R/R conflicts are avoided during
1917        // merge phase where states are kept split if merging would introduce
1918        // new R/R conflict. This essentially makes LALR_PAGER very close in
1919        // power to canonical LR(1) but with the number of states which is
1920        // almost like in LALR (i.e. LR(0)).
1921        //
1922        // In this case we have 13 states while in previous LALR case there was
1923        // 12 states.
1924        let settings = Settings::new().table_type(TableType::LALR_PAGER);
1925
1926        let table = LRTable::new(&grammar, &settings).unwrap();
1927
1928        output_cmp!(
1929            "src/table/grammar_nonlalr_lalr_pagerw.expected",
1930            format!("{grammar}\n\n{:#?}", table.states)
1931        );
1932    }
1933
1934    #[test]
1935    fn test_sorted_terminals() {
1936        let grammar: Grammar = r#"
1937            S: A | C | B;
1938            terminals
1939            A: /\d+/;
1940            B: "bb";
1941            C: "c";
1942            "#
1943        .parse()
1944        .unwrap();
1945
1946        let settings = Settings::new().table_type(TableType::LALR_PAGER);
1947
1948        let table = LRTable::new(&grammar, &settings).unwrap();
1949        assert_eq!(
1950            &table.states[StateIndex(0)]
1951                .sorted_terminals
1952                .iter()
1953                .map(|i| i.0 .0)
1954                .collect::<Vec<_>>(),
1955            &vec![2, 3, 1]
1956        );
1957    }
1958}
rustemo_compiler/table/mod.rs

rustemo_compiler/table/
mod.rs