rustemo_compiler/table/
mod.rs

1//! Calculating LR tables
2use std::{
3    cell::RefCell,
4    cmp::{self, Ordering},
5    collections::{BTreeMap, BTreeSet, VecDeque},
6    fmt::{self, Display},
7    iter::{self, repeat},
8    ops::{Index, IndexMut},
9    slice::{Iter, IterMut},
10};
11
12use clap::ValueEnum;
13use colored::Colorize;
14use itertools::{chain, Itertools};
15use rustemo::log;
16
17use crate::{
18    create_index,
19    error::{Error, Result},
20    grammar::{Associativity, Priority, Terminal, DEFAULT_PRIORITY},
21    index::{
22        NonTermIndex, NonTermVec, ProdIndex, ProdVec, StateIndex, StateVec, SymbolIndex,
23        SymbolVec, TermIndex, TermVec,
24    },
25    lang::rustemo_actions::Recognizer,
26    settings::{ParserAlgo, Settings},
27};
28
29use super::grammar::{res_symbol, Grammar};
30
31#[derive(Debug, Clone)]
32pub enum Action {
33    Shift(StateIndex),
34    Reduce(ProdIndex, usize),
35    Accept,
36}
37
38/// Specifies the type of the parsing table used during parsing to decide about
39/// shift/reduce/goto operations.
40#[allow(non_camel_case_types)]
41#[derive(Debug, Default, Clone, PartialEq, Eq, ValueEnum)]
42pub enum TableType {
43    /// Lookahead LR tables. See
44    /// <http://publications.csail.mit.edu/lcs/pubs/pdf/MIT-LCS-TR-065.pdf>
45    LALR,
46    /// A slight modification of LALR tables to prevent some reduce/reduce
47    /// conflicts. Inspired by <https://doi.org/10.1007/BF00290336>
48    #[default]
49    LALR_PAGER,
50    /// LALR tables extended with right-nulled entries. Used for GLR parsing
51    /// (RNGLR). See <https://doi.org/10.1145/1146809.1146810>
52    LALR_RN,
53}
54
55type Firsts = BTreeSet<SymbolIndex>;
56type FirstSets = SymbolVec<Firsts>;
57
58create_index!(ItemIndex, ItemVec);
59
60/// LR State is a set of LR items and a dict of LR automata actions and gotos.
61#[derive(Clone)]
62pub struct LRState<'g> {
63    grammar: &'g Grammar,
64
65    /// The index of this state.
66    pub idx: StateIndex,
67
68    /// The grammar symbol related to this state. Intuitively, the grammar
69    /// symbol seen on a transition to this state. E.g. if the symbol is
70    /// terminal the parser did a Shift operation to enter this state, otherwise
71    /// it did reduce.
72    pub symbol: SymbolIndex,
73
74    /// LR(1) items used to construct this state.
75    items: ItemVec<LRItem>,
76
77    /// A terminal indexed vector of LR actions. Actions instruct LR parser to
78    /// Shift from the input, Reduce the top of the LR stack or accept the
79    /// input. For the deterministic parsing the internal vector of actions can
80    /// contain only one action.
81    pub actions: TermVec<Vec<Action>>,
82
83    /// A non-terminal indexed vector of LR GOTOs. GOTOs represent transitions
84    /// to another state after successful reduction of a non-terminal.
85    pub gotos: NonTermVec<Option<StateIndex>>,
86
87    /// Terminals and finish flags sorted by the priority and most specific
88    /// match (if enabled) for lexical disambiguation. The finish flag, if true,
89    /// indicates that if we already have terminals that matched at this
90    /// location no further termininals should be tried.
91    pub sorted_terminals: Vec<(TermIndex, bool)>,
92
93    /// Each production has a priority. We use this priority to resolve S/R and
94    /// R/R conflicts. Since the Shift operation is executed over terminal symbol
95    /// to resolve S/R we need terminal priority. But, the priority given for a
96    /// terminal directly is used in lexical disambiguation. Instead, we need
97    /// terminal priority inherited from productions. We, say that the priority
98    /// of terminals in S/R resolution will be the priority of the production
99    /// terminal is used in. But, since the same terminal can be used in many
100    /// production we will take the maximum for S/R resolution.
101    max_prior_for_term: BTreeMap<TermIndex, Priority>,
102}
103
104/// Two LR states are equal if they contain the same kernel items.
105impl PartialEq for LRState<'_> {
106    fn eq(&self, other: &Self) -> bool {
107        let self_ki = self.kernel_items();
108        let other_ki = other.kernel_items();
109        self_ki.len() == other_ki.len() && self_ki.iter().zip(other_ki.iter()).all(|(x, y)| x == y)
110    }
111}
112impl Eq for LRState<'_> {}
113
114impl Display for LRState<'_> {
115    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
116        write!(
117            f,
118            "{}\n\t{}",
119            format!(
120                "State {}:{}",
121                self.idx,
122                self.grammar.symbol_name(self.symbol)
123            )
124            .green()
125            .bold(),
126            self.items
127                .iter()
128                .map(|i| i.to_string(self.grammar))
129                .collect::<Vec<_>>()
130                .join("\n\t")
131        )
132    }
133}
134
135impl std::fmt::Debug for LRState<'_> {
136    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
137        f.debug_struct("LRState")
138            .field("idx", &self.idx)
139            .field("symbol", &self.symbol)
140            .field("items", &self.items)
141            .field("actions", &self.actions)
142            .field("gotos", &self.gotos)
143            .field("sorted_terminals", &self.sorted_terminals)
144            .field("max_prior_for_term", &self.max_prior_for_term)
145            .finish()
146    }
147}
148
149impl<'g> LRState<'g> {
150    fn new(grammar: &'g Grammar, index: StateIndex, symbol: SymbolIndex) -> Self {
151        Self {
152            grammar,
153            idx: index,
154            symbol,
155            items: ItemVec::new(),
156            actions: grammar.new_termvec(vec![]),
157            gotos: grammar.new_nontermvec(None),
158            max_prior_for_term: BTreeMap::new(),
159            sorted_terminals: Vec::new(),
160        }
161    }
162
163    fn new_with_items(
164        grammar: &'g Grammar,
165        index: StateIndex,
166        symbol: SymbolIndex,
167        items: ItemVec<LRItem>,
168    ) -> Self {
169        Self {
170            grammar,
171            idx: index,
172            symbol,
173            items,
174            actions: grammar.new_termvec(vec![]),
175            gotos: grammar.new_nontermvec(None),
176            max_prior_for_term: BTreeMap::new(),
177            sorted_terminals: Vec::new(),
178        }
179    }
180
181    fn add_item(mut self, item: LRItem) -> Self {
182        self.items.push(item);
183        self
184    }
185
186    fn kernel_items(&self) -> Vec<&LRItem> {
187        self.items.iter().filter(|i| i.is_kernel()).collect()
188    }
189
190    fn nonkernel_items(&self) -> Vec<&LRItem> {
191        self.items.iter().filter(|i| !i.is_kernel()).collect()
192    }
193
194    /// Closes over LR items of the LRState.
195    ///
196    /// Starting from the given items (usually just kernel items), for each
197    /// item, if right of the dot is a non-terminal, adds all items where LHS is
198    /// a given non-terminal and the dot is at the beginning. In other words,
199    /// adds all missing non-kernel items.
200    fn closure(&mut self, first_sets: &FirstSets, prod_rn_lengths: &Option<ProdVec<usize>>) {
201        loop {
202            // This is OK as Hash uses only non-interior-mutable part of the
203            // LRItem type.
204            #[allow(clippy::mutable_key_type)]
205            let mut new_items: BTreeSet<LRItem> = BTreeSet::new();
206
207            for item in &self.items {
208                if let Some(symbol) = item.symbol_at_position(self.grammar) {
209                    if self.grammar.is_nonterm(symbol) {
210                        let mut new_follow;
211                        // Find first set of substring that follow symbol at
212                        // position
213                        if item.position + 1 < self.grammar.productions[item.prod].rhs.len() {
214                            new_follow = firsts(
215                                self.grammar,
216                                first_sets,
217                                &self.grammar.production_rhs_symbols(item.prod)
218                                    [item.position + 1..],
219                            );
220                            // If symbols that follows the current nonterminal
221                            // can derive EMPTY add follows of current item.
222                            if new_follow.contains(&self.grammar.empty_index) {
223                                new_follow.remove(&self.grammar.empty_index);
224                                new_follow.extend(item.follow.borrow().iter());
225                            }
226                        } else {
227                            // If current item position is at the end add all of
228                            // its follow to the next item.
229                            new_follow = Follow::new();
230                            new_follow.extend(item.follow.borrow().iter());
231                        }
232
233                        // Get all productions of the current non-terminal and
234                        // create LR items with the calculated follow.
235                        let nonterm = self.grammar.symbol_to_nonterm_index(symbol);
236                        for prod in &self.grammar.nonterminals[nonterm].productions {
237                            new_items.insert(LRItem::with_follow(
238                                self.grammar,
239                                *prod,
240                                prod_rn_lengths.as_ref().map(|p| p[*prod]),
241                                new_follow.clone(),
242                            ));
243                        }
244                    }
245                }
246            }
247
248            // Add all new items to state.items. If item is already there update
249            // follow. If there is no change break from the loop.
250            let mut change = false;
251            for new_item in new_items {
252                match self.items.iter_mut().find(|x| *x == &new_item) {
253                    Some(item) => {
254                        // Item already exists, update follows
255                        let l = item.follow.borrow().len();
256                        item.follow
257                            .borrow_mut()
258                            .extend(new_item.follow.borrow().iter());
259                        if item.follow.borrow().len() > l {
260                            change = true;
261                        }
262                    }
263                    None => {
264                        self.items.push(new_item);
265                        change = true;
266                    }
267                }
268            }
269            if !change {
270                break;
271            }
272        }
273    }
274
275    /// Group LR items per grammar symbol right of the dot, and calculate
276    /// terminal max priorities.
277    fn group_per_next_symbol(&mut self) -> BTreeMap<SymbolIndex, Vec<ItemIndex>> {
278        let mut per_next_symbol: BTreeMap<SymbolIndex, Vec<ItemIndex>> = BTreeMap::new();
279
280        for (idx, item) in self.items.iter().enumerate() {
281            let symbol = item.symbol_at_position(self.grammar);
282            if let Some(symbol) = symbol {
283                per_next_symbol.entry(symbol).or_default().push(idx.into());
284                if self.grammar.is_term(symbol) {
285                    let symbol = self.grammar.symbol_to_term_index(symbol);
286                    let prod_prio = self.grammar.productions[item.prod].prio;
287                    self.max_prior_for_term
288                        .entry(symbol)
289                        .and_modify(|v| *v = cmp::max(*v, prod_prio))
290                        .or_insert(prod_prio);
291                }
292            }
293        }
294        per_next_symbol
295    }
296}
297
298/// Represents an item in the items set. Item is defined by a production and a
299/// position inside production (the dot). If the item is of LR_1 type follow set
300/// is also defined. Follow set is a set of terminals that can follow symbol at
301/// the given position in the given production.
302#[derive(Debug, Eq, Clone, PartialOrd, Ord)]
303struct LRItem {
304    prod: ProdIndex,
305
306    /// The length of the production
307    prod_len: usize,
308
309    /// Right-null production length - the last symbol in the production where
310    /// all the following symbols can reduce EMPTY.
311    /// Used in RN-GLR to decide when the item can reduce. `None` for LR.
312    rn_len: Option<usize>,
313
314    /// The position of "the dot" in the RHS of the production
315    position: usize,
316
317    follow: RefCell<Follow>,
318}
319
320impl std::hash::Hash for LRItem {
321    fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
322        self.prod.hash(state);
323        self.position.hash(state);
324    }
325}
326
327impl PartialEq for LRItem {
328    fn eq(&self, other: &Self) -> bool {
329        self.prod == other.prod && self.position == other.position
330    }
331}
332
333/// LRItem is a production with a dot in the RHS.
334///
335/// Intuitively, the dot signifies the position where the parsing process is in
336/// a given state. The beginning position is 0, before the first symbol in a
337/// production RHS. The end position is len(RHS), after the last symbol in a
338/// RHS.
339///
340/// LRItem also keeps a set of follow terminals. The item is valid only if the
341/// production is followed by a terminal from the given follow set.
342///
343/// # Example
344///
345/// ```rust
346/// // If prod with index 5 is A: B a C;
347/// let item = LRItem::new(5)
348///                 .next_item().unwrap()
349///                 .next_item().unwrap();
350/// assert_eq(&item.position, 2)
351/// ```
352///
353/// ```text
354/// A: B a . C;
355///        ^
356///        |------ position is 2
357/// ```
358impl LRItem {
359    #[cfg(test)]
360    fn new(grammar: &Grammar, prod: ProdIndex, rn_len: Option<usize>) -> Self {
361        LRItem {
362            prod,
363            prod_len: grammar.production_len(prod),
364            rn_len,
365            position: 0,
366            follow: RefCell::new(Follow::new()),
367        }
368    }
369
370    fn with_follow(
371        grammar: &Grammar,
372        prod: ProdIndex,
373        rn_len: Option<usize>,
374        follow: Follow,
375    ) -> Self {
376        LRItem {
377            prod,
378            prod_len: grammar.production_len(prod),
379            rn_len,
380            position: 0,
381            follow: RefCell::new(follow),
382        }
383    }
384
385    fn symbol_at_position(&self, grammar: &Grammar) -> Option<SymbolIndex> {
386        Some(res_symbol(
387            grammar.productions.get(self.prod)?.rhs.get(self.position)?,
388        ))
389    }
390
391    /// Moves position to the right.
392    #[inline]
393    fn inc_position(mut self) -> Self {
394        assert!(self.position < self.prod_len);
395        self.position += 1;
396        self
397    }
398
399    /// True if this item belongs to the kernel core.
400    ///
401    /// Kernel core items are those where position is not 0 except the augmented
402    /// production which by definition belongs to the core.
403    #[inline]
404    fn is_kernel(&self) -> bool {
405        self.position > 0 || self.prod == ProdIndex(0)
406    }
407
408    #[inline]
409    fn is_reducing(&self) -> bool {
410        self.position == self.prod_len
411            || match self.rn_len {
412                Some(rn_len) => self.position >= rn_len,
413                None => false,
414            }
415    }
416
417    fn to_string(&self, grammar: &Grammar) -> String {
418        let prod = &grammar.productions[self.prod];
419        let mut rhs = prod
420            .rhs_symbols()
421            .iter()
422            .map(|s| grammar.symbol_name(*s))
423            .collect::<Vec<_>>();
424        rhs.insert(self.position, ".".into());
425        format!(
426            "{}: {}: {}    {{{}}}",
427            prod.idx.to_string().green(),
428            grammar
429                .symbol_name(grammar.nonterm_to_symbol_index(prod.nonterminal))
430                .green(),
431            rhs.join(" "),
432            self.follow
433                .borrow()
434                .iter()
435                .map(|f| grammar.symbol_name(*f))
436                .collect::<Vec<_>>()
437                .join(", ")
438        )
439    }
440}
441
442pub enum ConflictKind {
443    ShiftReduce(ProdIndex),
444    ReduceReduce(ProdIndex, ProdIndex),
445}
446
447pub struct Conflict<'g, 's> {
448    state: &'s LRState<'g>,
449    follow: TermIndex,
450    kind: ConflictKind,
451}
452
453impl Display for Conflict<'_, '_> {
454    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
455        write!(f, "")?;
456        Ok(())
457    }
458}
459
460pub struct LRTable<'g, 's> {
461    pub states: StateVec<LRState<'g>>,
462    pub layout_state: Option<StateIndex>,
463    grammar: &'g Grammar,
464    settings: &'s Settings,
465    first_sets: FirstSets,
466
467    /// Right-nulled length of productions. Used in RNGLR. RN length is a
468    /// position in a production after which all the remaining symbols can
469    /// derive EMPTY.
470    ///
471    /// E.g. if there is 3 symbols at the RHS and the last one can derive EMPTY
472    /// but others can't then its rn length is 2. Or, in other words, this
473    /// length is the minimal length of reduction for this production.
474    pub production_rn_lengths: Option<ProdVec<usize>>,
475}
476
477impl<'g, 's> LRTable<'g, 's> {
478    pub fn new(grammar: &'g Grammar, settings: &'s Settings) -> Result<Self> {
479        let first_sets = first_sets(grammar);
480        let production_rn_lengths = match settings.table_type {
481            TableType::LALR_RN => Some(production_rn_lengths(&first_sets, grammar)),
482            TableType::LALR | TableType::LALR_PAGER => None,
483        };
484        let mut table = Self {
485            grammar,
486            settings,
487            states: StateVec::new(),
488            layout_state: None,
489            first_sets,
490            production_rn_lengths,
491        };
492
493        table.check_empty_sets()?;
494
495        table.calc_states(grammar.augmented_index);
496        if let Some(augmented_layout_index) = grammar.augmented_layout_index {
497            table.layout_state = Some(StateIndex(table.states.len()));
498            table.calc_states(augmented_layout_index)
499        }
500
501        log!("LR states constructed. Updating follows.");
502        table.propagate_follows();
503
504        log!(
505            "Calculate REDUCTION entries in ACTION tables and resolve \
506            possible conflicts."
507        );
508        table.calculate_reductions();
509
510        log!("Sort terminals for lexical disambiguation");
511        table.sort_terminals();
512
513        println!("Terminals: {}", grammar.terminals.len());
514        println!("Non-terminals: {}", grammar.nonterminals().len());
515        println!("Productions: {}", grammar.productions().len());
516        println!("States: {}", table.states.len());
517
518        if settings.print_table {
519            println!("LR TABLE:");
520            println!("{table}");
521        }
522
523        Ok(table)
524    }
525
526    /// Calculate LR states with GOTOs and ACTIONs for the given Grammar.
527    ///
528    /// This collection of states is used to generate LR/GLR parser tables.
529    fn calc_states(&mut self, start_symbol: SymbolIndex) {
530        let mut current_state_idx = self.states.len();
531
532        let prods = &self.grammar.symbol_to_nonterm(start_symbol).productions;
533        assert_eq!(prods.len(), 1);
534
535        // Create a state for the first production (augmented)
536        let state = LRState::new(self.grammar, StateIndex(current_state_idx), start_symbol)
537            .add_item(LRItem::with_follow(
538                self.grammar,
539                prods[0],
540                self.production_rn_lengths.as_ref().map(|p| p[prods[0]]),
541                Follow::from([self.grammar.stop_index]),
542            ));
543        current_state_idx += 1;
544
545        // States to be processed.
546        let mut state_queue = VecDeque::from([state]);
547
548        log!("Calculating LR automaton states.");
549        while let Some(mut state) = state_queue.pop_front() {
550            // For each state calculate its closure first, i.e. starting from a so
551            // called "kernel items" expand collection with non-kernel items. We
552            // will also calculate GOTO and ACTIONS dicts for each state. These
553            // dicts will be keyed by a grammar symbol.
554            state.closure(&self.first_sets, &self.production_rn_lengths);
555
556            // To find out other states we examine following grammar symbols in the
557            // current state (symbols following current position/"dot") and group
558            // all items by a grammar symbol.
559            let per_next_symbol = state.group_per_next_symbol();
560
561            // Create accept action if possible.
562            for &symbol in per_next_symbol.keys() {
563                if symbol == self.grammar.stop_index {
564                    state.actions[self.grammar.symbol_to_term_index(symbol)] =
565                        vec![Action::Accept];
566                    break;
567                }
568            }
569
570            // Create new states reachable from the current state.
571            let new_states = Self::create_new_states(self.grammar, &state, per_next_symbol);
572
573            // Find states that already exist and try to merge. If not possible
574            // to merge or not found push state to state queue.
575            for mut new_state in new_states {
576                let target_state_symbol = new_state.symbol;
577                let mut new_state_found = true;
578                let mut target_state_idx = StateIndex(current_state_idx);
579                for old_state in self
580                    .states
581                    .iter_mut()
582                    .chain(state_queue.iter_mut())
583                    .chain(iter::once(&mut state))
584                    .filter(|x| **x == new_state)
585                {
586                    // If the same state already exists try to merge.
587                    if Self::merge_state(self.settings, old_state, &new_state) {
588                        new_state_found = false;
589                        target_state_idx = old_state.idx;
590                        break;
591                    }
592                }
593
594                // Create GOTO for non-terminal or Shift Action for terminal.
595                if self.grammar.is_nonterm(target_state_symbol) {
596                    state.gotos[self.grammar.symbol_to_nonterm_index(target_state_symbol)] =
597                        Some(target_state_idx);
598                } else {
599                    let term = self.grammar.symbol_to_term_index(new_state.symbol);
600                    state.actions[term].push(Action::Shift(target_state_idx));
601                }
602
603                if new_state_found {
604                    // Merge is not possible. Create new state.
605                    new_state.idx = StateIndex(current_state_idx);
606
607                    state_queue.push_back(new_state);
608                    current_state_idx += 1;
609                }
610            }
611
612            self.states.push(state);
613        }
614    }
615
616    /// Try to merge new_state to old_state if possible. If not possible return
617    /// false.
618    ///
619    /// If old state has no R/R conflicts additional check is made and merging is
620    /// not done if it would add R/R conflict.
621    fn merge_state(
622        settings: &Settings,
623        old_state: &mut LRState<'g>,
624        new_state: &LRState<'g>,
625    ) -> bool {
626        // States with different kernel sets cannot be merged.
627        if old_state != new_state {
628            return false;
629        }
630
631        let old_state_items = old_state
632            .items
633            .clone()
634            .into_iter()
635            .filter(|item| item.is_kernel());
636
637        // Item pairs of item from an old state and corresponding item from the new state.
638        let item_pairs: Vec<(&mut LRItem, &LRItem)> = iter::zip(
639            old_state.items.iter_mut().filter(|item| item.is_kernel()),
640            old_state_items.map(|x| new_state.items.iter().find(|&i| *i == x).unwrap()),
641        )
642        .collect();
643
644        if settings.table_type != TableType::LALR {
645            // If this is not pure LALR check to see if merging would introduce R/R.
646            // In case it would, do not merge but keep these states split.
647            //
648            // Menhir's week compatibility test (variant of Pager's weak comp. test).
649            // See Definition 2.29 from the paper:
650            // Denny, Joel E., and Brian A. Malloy. "The IELR (1) algorithm for generating
651            // minimal LR (1) parser tables for non-LR (1) grammars with conflict
652            // resolution." Science of Computer Programming 75.11 (2010): 943-979.
653            for (old, new) in &item_pairs {
654                if !old.is_reducing() {
655                    continue;
656                }
657                for (old_in, new_in) in &item_pairs {
658                    if old == old_in {
659                        continue;
660                    }
661                    // ∀t, at least one of the following is true:
662                    // (a) t ∈ {K1 ∩ K2' } ∧ t ∈ {K2 ∩ K1' }.
663                    // (b) t ∈ {K1 ∩ K2 }.
664                    // (c) t ∈ {K1' ∩ K2' }.
665                    for t in chain(
666                        old.follow.borrow().intersection(&*new_in.follow.borrow()),
667                        old_in.follow.borrow().intersection(&*new.follow.borrow()),
668                    ) {
669                        // Test (b) and (c) conditions
670                        if old
671                            .follow
672                            .borrow()
673                            .intersection(&*old_in.follow.borrow())
674                            .all(|x| x != t)
675                            && new
676                                .follow
677                                .borrow()
678                                .intersection(&*new_in.follow.borrow())
679                                .all(|x| x != t)
680                        {
681                            // We have found terminal which would introduce RR
682                            // conflict if states are merged.
683                            return false;
684                        }
685                    }
686                }
687            }
688        }
689
690        // Do the merge by updating old items follow sets.
691        for (old, new) in item_pairs {
692            old.follow.borrow_mut().extend(new.follow.borrow().iter())
693        }
694        true
695    }
696
697    /// Propagate LR items follows.
698    ///
699    /// This is needed due to state merging. Whenever merge occurs, target state
700    /// follows might get updated so we have to propagate those changes to other
701    /// states.
702    fn propagate_follows(&mut self) {
703        let mut changed = true;
704        while changed {
705            changed = false;
706            for state in self.states.iter_mut() {
707                // Refresh closure to propagate follows from kernel items to
708                // non-kernel of the same state as the merge is done only for kernel
709                // items.
710                state.closure(&self.first_sets, &self.production_rn_lengths);
711            }
712
713            for state in self.states.iter() {
714                // Use GOTOs and ACTIONS to propagate follows between states.
715                state
716                    .gotos
717                    .iter()
718                    .filter_map(|x| x.as_ref())
719                    .chain(state.actions.iter().flat_map(|x| {
720                        x.iter().filter_map(|a| match a {
721                            Action::Shift(state) => Some(state),
722                            _ => None,
723                        })
724                    }))
725                    .for_each(|&target_state| {
726                        for target_item in &mut self.states[target_state]
727                            .items
728                            .iter()
729                            .filter(|x| x.is_kernel())
730                        {
731                            // Find corresponding item in state
732                            if let Some(source_item) = state.items.iter().find(|&x| {
733                                x.prod == target_item.prod
734                                    && x.position == target_item.position - 1
735                            }) {
736                                // Update follow of target item with item from state
737                                let follow_len = target_item.follow.borrow().len();
738                                target_item
739                                    .follow
740                                    .borrow_mut()
741                                    .extend(source_item.follow.borrow().iter());
742
743                                // if target item follow was changed set changed to true
744                                if target_item.follow.borrow().len() > follow_len {
745                                    changed = true
746                                }
747                            }
748                        }
749                    })
750            }
751        }
752    }
753
754    /// Calculate reductions entries in action tables and resolve possible
755    /// conflicts.
756    fn calculate_reductions(&mut self) {
757        let mut aug_symbols = vec![self.grammar.augmented_index];
758        if let Some(layout_index) = self.grammar.augmented_layout_index {
759            aug_symbols.push(layout_index);
760        }
761        for state in &mut self.states {
762            for item in state.items.iter().filter(|x| x.is_reducing()) {
763                let prod = &self.grammar.productions[item.prod];
764
765                // Accept if reducing by augmented productions for STOP
766                // lookahead but do not take into account RN reductions as the
767                // whole AUG production may be right nullable which would
768                // produce an Accept action from the initial state which would
769                // be in conflict with empty reduction.
770                if aug_symbols.contains(&self.grammar.nonterm_to_symbol_index(prod.nonterminal)) {
771                    if item.position == item.prod_len {
772                        let actions = &mut state.actions[TermIndex(0)];
773                        actions.push(Action::Accept);
774                    }
775                    continue;
776                }
777
778                let new_reduce = Action::Reduce(item.prod, item.position);
779                for follow_symbol in item.follow.borrow().iter() {
780                    let follow_term = self.grammar.symbol_to_term(*follow_symbol);
781                    let actions = &mut state.actions[follow_term.idx];
782                    if actions.is_empty() {
783                        // No other action are possible for this follow terminal.
784                        // Just register this reduction.
785                        actions.push(new_reduce.clone());
786                    } else {
787                        // Conflict. Try to resolve.
788                        let (shifts, reduces): (Vec<_>, Vec<_>) = actions
789                            .clone()
790                            .into_iter()
791                            .partition(|x| matches!(x, Action::Shift(_) | Action::Accept));
792                        // Only one SHIFT or ACCEPT might exists for a single
793                        // terminal but many REDUCEs might exist.
794                        assert!(shifts.len() <= 1);
795
796                        let mut should_reduce = true;
797                        if let Some(shift) = shifts.first() {
798                            // Shift/Reduce conflict. Use assoc and priority to
799                            // resolve. For disambiguation treat ACCEPT action the
800                            // same as SHIFT.
801                            let shift_prio = match shift {
802                                Action::Accept => DEFAULT_PRIORITY,
803                                _ => state.max_prior_for_term[&follow_term.idx],
804                            };
805                            match prod.prio.cmp(&shift_prio) {
806                                Ordering::Less => {
807                                    // If priority of existing SHIFT action is
808                                    // higher then leave it instead
809                                    should_reduce = false
810                                }
811                                Ordering::Equal => {
812                                    // If priorities are the same use associativity
813                                    // Terminals associativity has priority over
814                                    // production associativity
815                                    match (&prod.assoc, &follow_term.assoc) {
816                                        (Associativity::Left, Associativity::None)
817                                        | (_, Associativity::Right) => {
818                                            // Override SHIFT with this REDUCE
819                                            assert!(actions.len() == 1);
820                                            actions.pop();
821                                        }
822                                        (Associativity::Right, Associativity::None)
823                                        | (_, Associativity::Left) => {
824                                            // If associativity is right leave SHIFT
825                                            // action as "stronger" and don't consider
826                                            // this reduction any more. Right
827                                            // associative reductions can't be in the
828                                            // same set of actions together with SHIFTs.
829                                            should_reduce = false;
830                                        }
831                                        (Associativity::None, Associativity::None) => {
832                                            // If priorities are the same and no
833                                            // associativity defined use preferred
834                                            // strategy.
835                                            let empty = prod.rhs.is_empty();
836                                            let prod_pse = empty
837                                                && self.settings.prefer_shifts_over_empty
838                                                && !prod.nopse;
839                                            let prod_ps = !empty
840                                                && self.settings.prefer_shifts
841                                                && !prod.nops;
842                                            should_reduce = !(prod_pse || prod_ps);
843                                        }
844                                    }
845                                }
846                                Ordering::Greater => {
847                                    // This item operation priority is higher =>
848                                    // override with reduce
849                                    assert!(actions.len() == 1);
850                                    actions.pop();
851                                }
852                            }
853                        }
854
855                        if should_reduce {
856                            if reduces.is_empty() {
857                                actions.push(new_reduce.clone())
858                            } else {
859                                // REDUCE/REDUCE conflicts.
860                                // Try to resolve using priorities.
861                                let reduces_prio = reduces
862                                    .iter()
863                                    .map(|x| match x {
864                                        Action::Reduce(prod, ..) => {
865                                            self.grammar.productions[*prod].prio
866                                        }
867                                        other => panic!("This should not happen. Got {other:?}"),
868                                    })
869                                    .collect::<Vec<_>>();
870                                if reduces_prio.iter().all(|x| prod.prio < *x) {
871                                    // Current product priority is less than all
872                                    // other reductions. Do not add this reduction.
873                                } else if reduces_prio.iter().all(|x| prod.prio > *x) {
874                                    // Current product priority is greater than all
875                                    // other reductions. This reduction should
876                                    // replace all others.
877                                    actions.retain(|x| !matches!(x, Action::Reduce(..)));
878                                    actions.push(new_reduce.clone())
879                                } else {
880                                    // For LR parsing non-empty reductions are
881                                    // preferred over empty...
882                                    if let ParserAlgo::LR = self.settings.parser_algo {
883                                        // ... so remove all empty reductions.
884                                        actions.retain(
885                                            |x| !matches!(x, Action::Reduce(_, len) if *len == 0),
886                                        );
887
888                                        if item.prod_len > 0 || actions.is_empty() {
889                                            // If current reduction is non-empty add it.
890                                            actions.push(new_reduce.clone())
891                                        }
892                                    } else {
893                                        // This R/R conflict can't be resolved.
894                                        // Just add the new reduction and GLR
895                                        // will handle it by investigating all
896                                        // possibilities.
897                                        actions.push(new_reduce.clone())
898                                    }
899                                }
900                            }
901                        }
902                    }
903                }
904            }
905        }
906    }
907
908    /// Sort terminals for each state according to explicit priority and
909    /// terminal recognizer type. String recognizers have precedence over regex
910    /// recognizers if most specific match strategy is enabled. For most
911    /// specific match, longer string recognizers have precedence over shorter.
912    fn sort_terminals(&mut self) {
913        for state in &mut self.states {
914            let mut terminals = state
915                .actions
916                .iter()
917                .enumerate()
918                .filter(|(_, actions)| !actions.is_empty())
919                .map(|(idx, _)| self.grammar.term_by_index(TermIndex(idx)))
920                .collect::<Vec<_>>();
921
922            let term_prio = |term: &Terminal| -> u32 {
923                term.prio * 1000
924                    + if self.settings.lexical_disamb_most_specific {
925                        match &term.recognizer {
926                            Some(recognizer) => {
927                                (match recognizer {
928                                    Recognizer::StrConst(str_rec) => str_rec.as_ref().len(),
929                                    Recognizer::RegexTerm(_) => 0,
930                                }) as u32
931                            }
932                            None => 0,
933                        }
934                    } else {
935                        0
936                    }
937            };
938            terminals.sort_by(|&l, &r| {
939                let l_term_prio = term_prio(l);
940                let r_term_prio = term_prio(r);
941                r_term_prio.cmp(&l_term_prio)
942            });
943
944            // Calculate "finish" flags
945            let mut sorted_terminals: Vec<(TermIndex, bool)> = vec![];
946            let mut last_prio = None;
947            for terminal in terminals {
948                let finish = self.settings.lexical_disamb_most_specific
949                    && matches!(terminal.recognizer, Some(Recognizer::StrConst(_)));
950                let last_finish = last_prio.is_some_and(|prio| terminal.prio != prio);
951                last_prio = Some(terminal.prio);
952                if let Some(t) = sorted_terminals.last_mut() {
953                    t.1 |= last_finish
954                }
955                sorted_terminals.push((terminal.idx, finish));
956            }
957
958            state.sorted_terminals = sorted_terminals;
959        }
960    }
961
962    /// Create new states that can be reached from the given state.
963    fn create_new_states(
964        grammar: &'g Grammar,
965        state: &LRState,
966        per_next_symbol: BTreeMap<SymbolIndex, Vec<ItemIndex>>,
967    ) -> Vec<LRState<'g>> {
968        let mut states = Vec::new();
969        for (symbol, items) in per_next_symbol {
970            let next_state_items = items
971                .into_iter()
972                .map(|i| state.items[i].clone().inc_position())
973                .collect();
974            states.push(LRState::new_with_items(
975                grammar,
976                StateIndex(0), // Temporary value. The caller will set the real index.
977                symbol,
978                next_state_items,
979            ));
980        }
981        states
982    }
983
984    /// Check for states with GOTO links but without SHIFT links.
985    ///
986    /// This is invalid as GOTO links will never be traversed.
987    fn check_empty_sets(&self) -> Result<()> {
988        if let Some((idx, _)) = self
989            .first_sets
990            .iter()
991            .enumerate()
992            .find(|(_, s)| s.is_empty())
993        {
994            return Err(Error::Error(format!(
995                "First set empty for grammar symbol {:?}.\n\
996                 An infinite recursion on the grammar symbol.",
997                &self.grammar.symbol_name(SymbolIndex(idx))
998            )));
999        }
1000        Ok(())
1001    }
1002
1003    pub fn get_conflicts(&'s self) -> Vec<Conflict<'g, 's>> {
1004        self.states
1005            .iter()
1006            .flat_map(|state| {
1007                state
1008                    .actions
1009                    .iter()
1010                    .enumerate()
1011                    .filter(|(_, actions)| actions.len() > 1)
1012                    .flat_map(|(idx, actions)| {
1013                        actions
1014                            .iter()
1015                            .combinations(2)
1016                            .map(move |conflict| (idx, conflict))
1017                    })
1018                    .map(|(term_index, conflict)| {
1019                        let kind = match &conflict[..] {
1020                            [Action::Shift(_), Action::Reduce(prod, _)]
1021                            | [Action::Reduce(prod, _), Action::Shift(_)] => {
1022                                ConflictKind::ShiftReduce(*prod)
1023                            }
1024                            [Action::Reduce(prod1, _), Action::Reduce(prod2, _)] => {
1025                                ConflictKind::ReduceReduce(*prod1, *prod2)
1026                            }
1027                            e => {
1028                                // This cannot happen as we have combinations of size 2.
1029                                eprint!("{e:?}");
1030                                unreachable!()
1031                            }
1032                        };
1033                        Conflict {
1034                            state,
1035                            follow: TermIndex(term_index),
1036                            kind,
1037                        }
1038                    })
1039            })
1040            .collect()
1041    }
1042
1043    pub fn print_conflicts_report(&self, conflicts: &Vec<Conflict<'g, 's>>) {
1044        for conflict in conflicts {
1045            println!("{} {}", "In".green().bold(), conflict.state);
1046            print!(
1047                "When I saw {} and see token {} ahead I can't decide",
1048                self.grammar.symbol_name(conflict.state.symbol).green(),
1049                self.grammar
1050                    .symbol_name(self.grammar.term_to_symbol_index(conflict.follow))
1051                    .green()
1052            );
1053            match conflict.kind {
1054                ConflictKind::ShiftReduce(prod) => {
1055                    println!(
1056                        " should I shift or reduce by production:\n{}\n",
1057                        self.grammar.productions[prod]
1058                            .to_string(self.grammar)
1059                            .green()
1060                    );
1061                }
1062                ConflictKind::ReduceReduce(prod1, prod2) => {
1063                    println!(
1064                        " should I reduce by production:\n{}\nor production:\n{}\n",
1065                        self.grammar.productions[prod1]
1066                            .to_string(self.grammar)
1067                            .green(),
1068                        self.grammar.productions[prod2]
1069                            .to_string(self.grammar)
1070                            .green()
1071                    );
1072                }
1073            }
1074        }
1075        let shift_reduce_len = conflicts
1076            .iter()
1077            .filter(|c| matches!(c.kind, ConflictKind::ShiftReduce(..)))
1078            .count();
1079        let reduce_reduce_len = conflicts
1080            .iter()
1081            .filter(|c| matches!(c.kind, ConflictKind::ReduceReduce(..)))
1082            .count();
1083        println!(
1084            "{}",
1085            format!(
1086                "{} conflict(s). {} Shift/Reduce and {} Reduce/Reduce.",
1087                shift_reduce_len + reduce_reduce_len,
1088                shift_reduce_len,
1089                reduce_reduce_len
1090            )
1091            .green()
1092        );
1093    }
1094
1095    /// Maximal number of actions per state/token. For LR can't be >1.
1096    #[inline]
1097    pub fn max_actions(&self) -> usize {
1098        self.states
1099            .iter()
1100            .map(|state| state.actions.iter().map(|a| a.len()).max().unwrap_or(0))
1101            .max()
1102            .unwrap()
1103    }
1104
1105    #[inline]
1106    pub fn max_recognizers(&self) -> usize {
1107        self.states
1108            .iter()
1109            .map(|state| state.actions.iter().filter(|a| !a.is_empty()).count())
1110            .max()
1111            .unwrap()
1112    }
1113
1114    pub fn to_dot(&self) -> String {
1115        let mut dot = String::from(
1116            r#"
1117            digraph grammar {
1118            rankdir=LR
1119            fontname = "Bitstream Vera Sans"
1120            fontsize = 8
1121            node[
1122                shape=record,
1123                style=filled,
1124                fillcolor=aliceblue
1125            ]
1126            nodesep = 0.3
1127            edge[dir=black,arrowtail=empty]
1128
1129        "#,
1130        );
1131
1132        let dot_escape = |s: &String| {
1133            s.replace('\n', r"\n")
1134                .replace('\\', "\\\\")
1135                .replace('"', r#"\""#)
1136                .replace('|', r"\|")
1137                .replace('{', r"\{")
1138                .replace('}', r"\}")
1139                .replace('>', r"\>")
1140                .replace('<', r"\<")
1141                .replace('?', r"\?")
1142        };
1143
1144        colored::control::set_override(false);
1145
1146        let conflicts = self.get_conflicts();
1147        let is_conflict_state = |state: &LRState| conflicts.iter().any(|s| s.state == state);
1148
1149        for state in &self.states {
1150            let mut kernel_items_str = String::new();
1151            for item in state.kernel_items() {
1152                kernel_items_str += &format!("{}\\l", dot_escape(&item.to_string(self.grammar)));
1153            }
1154
1155            let nonkernel_items = state.nonkernel_items();
1156            let mut nonkernel_items_str = if !nonkernel_items.is_empty() {
1157                String::from("|")
1158            } else {
1159                String::new()
1160            };
1161
1162            for item in nonkernel_items {
1163                nonkernel_items_str +=
1164                    &format!("{}\\l", dot_escape(&item.to_string(self.grammar)));
1165            }
1166
1167            let mut reductions: Vec<String> = vec![];
1168            for term in &self.grammar.terminals {
1169                let mut term_reduction_prods: Vec<String> = vec![];
1170                for action in &state.actions[term.idx] {
1171                    match action {
1172                        Action::Shift(target_state_idx) => {
1173                            dot += &format!(
1174                                "{} -> {} [label=\"SHIFT:{}\"]\n",
1175                                state.idx, target_state_idx, term.name
1176                            )
1177                        }
1178                        Action::Reduce(prod_idx, len) => {
1179                            term_reduction_prods.push(format!("({prod_idx},{len})"));
1180                        }
1181                        Action::Accept => {
1182                            dot += &format!("{} -> ACCEPT [label=\"{}\"]\n", state.idx, term.name)
1183                        }
1184                    }
1185                }
1186                if !term_reduction_prods.is_empty() {
1187                    let r = term_reduction_prods.join(", ");
1188                    reductions.push(if term_reduction_prods.len() > 1 {
1189                        format!("{}:[{}]", dot_escape(&term.name), r)
1190                    } else {
1191                        format!("{}:{}", dot_escape(&term.name), r)
1192                    });
1193                }
1194            }
1195            let reductions = if !reductions.is_empty() {
1196                format!("|Reductions:\\l{}", reductions.join(", "))
1197            } else {
1198                String::new()
1199            };
1200
1201            dot += &format!(
1202                "{} [label=\"{}|{}{}{}\"{}]\n",
1203                state.idx,
1204                dot_escape(&format!(
1205                    "{}:{}",
1206                    state.idx,
1207                    self.grammar.symbol_name(state.symbol)
1208                )),
1209                kernel_items_str,
1210                nonkernel_items_str,
1211                reductions,
1212                if is_conflict_state(state) {
1213                    ", fillcolor=\"lightpink\""
1214                } else {
1215                    ""
1216                }
1217            );
1218
1219            // GOTOs
1220            for nonterm in &self.grammar.nonterminals {
1221                if let Some(target_state_idx) = state.gotos[nonterm.idx] {
1222                    dot += &format!(
1223                        "{} -> {} [label=\"GOTO:{}\"]\n",
1224                        state.idx, target_state_idx, nonterm.name
1225                    )
1226                }
1227            }
1228        }
1229        colored::control::unset_override();
1230
1231        dot += "\n}\n";
1232        dot
1233    }
1234}
1235
1236fn production_rn_lengths(
1237    first_sets: &SymbolVec<BTreeSet<SymbolIndex>>,
1238    grammar: &Grammar,
1239) -> ProdVec<usize> {
1240    let mut prod_rn_lens = ProdVec::new();
1241    for production in &grammar.productions {
1242        let mut rn_len = production.rhs.len();
1243        for symbol in production.rhs_symbols().iter().rev() {
1244            if first_sets[*symbol].contains(&grammar.empty_index) {
1245                rn_len -= 1;
1246            } else {
1247                break;
1248            }
1249        }
1250        prod_rn_lens.push(rn_len)
1251    }
1252    prod_rn_lens
1253}
1254
1255impl Display for LRTable<'_, '_> {
1256    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1257        for state in &self.states {
1258            writeln!(
1259                f,
1260                "\n{}",
1261                format!(
1262                    "State {}:{}",
1263                    state.idx,
1264                    self.grammar.symbol_name(state.symbol)
1265                )
1266                .green(),
1267            )?;
1268            for item in &state.items {
1269                writeln!(f, "\t{}", item.to_string(self.grammar))?;
1270            }
1271            let actions = state
1272                .actions
1273                .iter()
1274                .enumerate()
1275                .filter(|(_, a)| !a.is_empty())
1276                .flat_map(|(i, a)| repeat(i).zip(a.iter()))
1277                .map(|(i, a)| {
1278                    (
1279                        self.grammar.terminals[TermIndex(i)].name.clone(),
1280                        match a {
1281                            Action::Shift(s) => format!("Shift to {s}"),
1282                            Action::Reduce(p, l) => {
1283                                format!(
1284                                    "Reduce for len {l} by:   {}",
1285                                    self.grammar.productions[*p].to_string(self.grammar)
1286                                )
1287                            }
1288                            Action::Accept => "Accept".into(),
1289                        },
1290                    )
1291                })
1292                .collect::<Vec<_>>();
1293
1294            if !actions.is_empty() {
1295                writeln!(f, "{}", "\tACTIONS:".green())?;
1296                for (term, action) in actions {
1297                    writeln!(f, "\t\t{term} {} {action}", "=>".green())?;
1298                }
1299            }
1300
1301            // GOTOs
1302            let gotos = state
1303                .gotos
1304                .iter()
1305                .enumerate()
1306                .filter(|(_, g)| g.is_some())
1307                .map(|(idx, g)| {
1308                    let g = g.unwrap();
1309                    (
1310                        self.grammar.nonterminals[NonTermIndex(idx)].name.clone(),
1311                        format!(
1312                            "State {}:{}",
1313                            self.grammar.symbol_name(self.states[g].symbol),
1314                            g.0
1315                        ),
1316                    )
1317                })
1318                .collect::<Vec<_>>();
1319
1320            if !gotos.is_empty() {
1321                writeln!(f, "{}", "\tGOTOs:".green())?;
1322                for (nonterm, goto) in gotos {
1323                    writeln!(f, "\t\t{nonterm} {} {goto}", "=>".green())?;
1324                }
1325            }
1326        }
1327        Ok(())
1328    }
1329}
1330
1331/// Calculates the sets of terminals that can start the sentence derived from all
1332/// grammar symbols.
1333///
1334/// The Dragon book p. 221.
1335fn first_sets(grammar: &Grammar) -> FirstSets {
1336    let mut first_sets = SymbolVec::new();
1337
1338    // First set for each terminal contains only the terminal itself.
1339    for terminal in &grammar.terminals {
1340        let mut new_set = Firsts::new();
1341        new_set.insert(terminal.idx.symbol_index());
1342        first_sets.push(new_set);
1343    }
1344
1345    // Initialize empty sets for nonterminals
1346    grammar
1347        .nonterminals
1348        .iter()
1349        .for_each(|_| first_sets.push(Firsts::new()));
1350
1351    // EMPTY derives EMPTY
1352    first_sets[grammar.empty_index].insert(grammar.empty_index);
1353
1354    let mut additions = true;
1355    while additions {
1356        additions = false;
1357        for production in &grammar.productions {
1358            let lhs_nonterm = grammar.nonterm_to_symbol_index(production.nonterminal);
1359
1360            let rhs_firsts = firsts(grammar, &first_sets, &production.rhs_symbols());
1361
1362            let lhs_len = first_sets[lhs_nonterm].len();
1363
1364            first_sets[lhs_nonterm].extend(rhs_firsts);
1365
1366            // Check if any addition is actually performed.
1367            if lhs_len < first_sets[lhs_nonterm].len() {
1368                additions = true
1369            }
1370        }
1371    }
1372    first_sets
1373}
1374
1375/// For the given sequence of symbols finds a set of FIRST terminals.
1376///
1377/// Finds all terminals which can start the given sequence of symbols. Note that
1378/// if all symbols in the sequence can derive EMPTY, EMPTY will be a part of the
1379/// returned set.
1380fn firsts(grammar: &Grammar, first_sets: &FirstSets, symbols: &[SymbolIndex]) -> Firsts {
1381    let mut firsts = Firsts::new();
1382    let mut break_out = false;
1383    for &symbol in symbols {
1384        let symbol_firsts = &first_sets[symbol];
1385        let mut empty = false;
1386
1387        for first in symbol_firsts {
1388            if *first == grammar.empty_index {
1389                empty = true;
1390            } else {
1391                firsts.insert(*first);
1392            }
1393        }
1394
1395        // We should proceed to the next symbol in sequence only if the current
1396        // symbol can produce EMPTY.
1397        if !empty {
1398            break_out = true;
1399            break;
1400        }
1401    }
1402    if !break_out {
1403        // If we reached the end of symbol sequence and each symbol along the
1404        // way could derive EMPTY than we must add EMPTY to the firsts.
1405        firsts.insert(grammar.empty_index);
1406    }
1407    firsts
1408}
1409
1410/// Calculates the sets of terminals that can follow some non-terminal for the
1411/// given grammar.
1412///
1413/// The dragon book p.221
1414/// Currently unused
1415type Follow = BTreeSet<SymbolIndex>;
1416#[allow(dead_code)]
1417type FollowSets = SymbolVec<Follow>;
1418#[cfg(test)]
1419fn follow_sets(grammar: &Grammar, first_sets: &FirstSets) -> FollowSets {
1420    let mut follow_sets = FollowSets::new();
1421    for _ in 0..first_sets.len() {
1422        follow_sets.push(Follow::new());
1423    }
1424
1425    // Rule 1: Place $ in FOLLOW(S), where S is the start symbol, and $ is
1426    // the input right endmarker.
1427    follow_sets[grammar.augmented_index].insert(grammar.stop_index);
1428
1429    let mut additions = true;
1430    while additions {
1431        additions = false;
1432        for production in &grammar.productions {
1433            let lhs_symbol = grammar.nonterm_to_symbol_index(production.nonterminal);
1434
1435            // Rule 2: If there is a production A -> α B β then everything in
1436            // FIRST(β) except EMPTY is in FOLLOW(B).
1437            for idx in 0..production.rhs.len() {
1438                let rhs_symbol = production.rhs_symbol(idx);
1439                let elements = follow_sets[rhs_symbol].len();
1440                let mut break_out = false;
1441                for rnext in &production.rhs[idx + 1..] {
1442                    let follow_symbols = &first_sets[res_symbol(rnext)];
1443
1444                    follow_sets[rhs_symbol]
1445                        .extend(follow_symbols.iter().filter(|&&s| s != grammar.empty_index));
1446
1447                    if !follow_symbols.contains(&grammar.empty_index) {
1448                        break_out = true;
1449                        break;
1450                    }
1451                }
1452
1453                if !break_out {
1454                    // Rule 3: If all symbols right of current RHS produce EMPTY
1455                    // then this RHS symbol must contain all what follows symbol
1456                    // at LHS.
1457                    let lhs_follows: Follow = follow_sets[lhs_symbol].iter().copied().collect();
1458                    follow_sets[rhs_symbol].extend(lhs_follows.iter());
1459                }
1460
1461                if follow_sets[rhs_symbol].len() > elements {
1462                    additions = true
1463                }
1464            }
1465        }
1466    }
1467    follow_sets
1468}
1469
1470#[cfg(test)]
1471mod tests {
1472
1473    use std::cell::RefCell;
1474    use std::collections::BTreeSet;
1475
1476    use crate::index::{ProdIndex, StateIndex, SymbolIndex};
1477    use crate::table::{first_sets, follow_sets, ItemIndex, LRTable, TableType};
1478    use crate::{
1479        grammar::Grammar,
1480        output_cmp,
1481        settings::Settings,
1482        table::{Follow, LRItem},
1483    };
1484
1485    use super::{production_rn_lengths, LRState};
1486
1487    fn follow<T, I>(indexes: T) -> BTreeSet<SymbolIndex>
1488    where
1489        T: IntoIterator<Item = I>,
1490        I: Into<SymbolIndex>,
1491    {
1492        indexes.into_iter().map(|i| i.into()).collect()
1493    }
1494
1495    fn test_grammar() -> Grammar {
1496        r#"
1497        E: T Ep;
1498        Ep: "+" T Ep | EMPTY;
1499        T: F Tp;
1500        Tp: "*" F Tp | EMPTY;
1501        F: "(" E ")" | "id";
1502
1503        terminals
1504        Plus: "+";
1505        Mul: "*";
1506        LParen: "(";
1507        RParen: ")";
1508        id: "id";
1509        "#
1510        .parse()
1511        .unwrap()
1512    }
1513
1514    fn test_grammar_2() -> Grammar {
1515        r#"
1516        E: E "+" T | T;
1517        T: T "*" F | F;
1518        F: "(" E ")" | "id";
1519
1520        terminals
1521        Plus: "+";
1522        Mul: "*";
1523        LParen: "(";
1524        RParen: ")";
1525        id: "id";
1526        "#
1527        .parse()
1528        .unwrap()
1529    }
1530
1531    /// Grammar from the Dragon book, p.278
1532    /// This grammar is LR(1) but not LALR.
1533    /// See also: https://www.gnu.org/software/bison/manual/bison.html#Mysterious-Conflicts
1534    fn test_non_lalr_grammar() -> Grammar {
1535        r#"
1536        S: A "a" | "b" A "c" | B "c" | "b" B "a";
1537        A: "d";
1538        B: "d";
1539        terminals
1540        a_t: "a";
1541        b_t: "b";
1542        c_t: "c";
1543        d_t: "d";
1544        "#
1545        .parse()
1546        .unwrap()
1547    }
1548
1549    fn test_ambiguous_grammar() -> Grammar {
1550        r#"
1551        E: E "+" E {1, left}
1552            | E "*" E {2, left}
1553            | E "^" E {3, right}
1554            | "(" E ")"
1555            | "id";
1556
1557        terminals
1558        Plus: "+";
1559        Mul: "*";
1560        Power: "^";
1561        LParen: "(";
1562        RParen: ")";
1563        id: "id";
1564        "#
1565        .parse()
1566        .unwrap()
1567    }
1568
1569    #[test]
1570    fn test_first_sets() {
1571        let grammar = test_grammar();
1572        let first_sets = first_sets(&grammar);
1573
1574        assert_eq!(first_sets.len(), 13);
1575
1576        // First of terminal is just a terminal itself.
1577        assert_eq!(
1578            &first_sets[grammar.symbol_index("id")],
1579            &follow(grammar.symbol_indexes(&["id"]))
1580        );
1581
1582        assert_eq!(
1583            &first_sets[grammar.symbol_index("F")],
1584            &follow(grammar.symbol_indexes(&["LParen", "id"]))
1585        );
1586        assert_eq!(
1587            &first_sets[grammar.symbol_index("T")],
1588            &follow(grammar.symbol_indexes(&["LParen", "id"]))
1589        );
1590        assert_eq!(
1591            &first_sets[grammar.symbol_index("E")],
1592            &follow(grammar.symbol_indexes(&["LParen", "id"]))
1593        );
1594        assert_eq!(
1595            &first_sets[grammar.symbol_index("Ep")],
1596            &follow(grammar.symbol_indexes(&["Plus", "EMPTY"]))
1597        );
1598        assert_eq!(
1599            &first_sets[grammar.symbol_index("Tp")],
1600            &follow(grammar.symbol_indexes(&["Mul", "EMPTY"]))
1601        );
1602    }
1603
1604    #[test]
1605    fn test_follow_sets() {
1606        let grammar = test_grammar();
1607        let follow_sets = follow_sets(&grammar, &first_sets(&grammar));
1608
1609        assert_eq!(
1610            &follow_sets[grammar.symbol_index("E")],
1611            &follow(grammar.symbol_indexes(&["RParen", "STOP"]))
1612        );
1613        assert_eq!(
1614            &follow_sets[grammar.symbol_index("Ep")],
1615            &follow(grammar.symbol_indexes(&["RParen", "STOP"]))
1616        );
1617        assert_eq!(
1618            &follow_sets[grammar.symbol_index("T")],
1619            &follow(grammar.symbol_indexes(&["Plus", "RParen", "STOP"]))
1620        );
1621        assert_eq!(
1622            &follow_sets[grammar.symbol_index("Tp")],
1623            &follow(grammar.symbol_indexes(&["Plus", "RParen", "STOP"]))
1624        );
1625    }
1626
1627    #[test]
1628    fn test_prooduction_rn_lengths() {
1629        let grammar = test_grammar();
1630        let first_sets = first_sets(&grammar);
1631
1632        let production_rn_lengths = production_rn_lengths(&first_sets, &grammar);
1633
1634        // RN length of "E: T Ep" is 1 as "Ep" can derive EMPTY but "T" can't.
1635        assert_eq!(production_rn_lengths[ProdIndex(1)], 1);
1636
1637        // RN length of "Ep: "+" T Ep" is 2.
1638        assert_eq!(production_rn_lengths[ProdIndex(2)], 2);
1639    }
1640
1641    #[test]
1642    fn test_symbol_at_position() {
1643        let grammar = test_grammar();
1644
1645        let prod = ProdIndex(1);
1646        let mut item = LRItem::new(&grammar, prod, None);
1647        assert_eq!(
1648            &grammar.symbol_names(grammar.productions[prod].rhs_symbols()),
1649            &["T", "Ep"]
1650        );
1651        assert_eq!(
1652            item.symbol_at_position(&grammar).unwrap(),
1653            grammar.symbol_index("T")
1654        );
1655        item.position = 1;
1656        assert_eq!(
1657            &grammar.symbol_name(item.symbol_at_position(&grammar).unwrap()),
1658            "Ep"
1659        );
1660        item.position = 2;
1661        assert!(item.symbol_at_position(&grammar).is_none());
1662        item.position = 3;
1663        assert!(item.symbol_at_position(&grammar).is_none());
1664    }
1665
1666    #[test]
1667    fn test_group_per_next_symbol() {
1668        let grammar = test_ambiguous_grammar();
1669
1670        // Create some LR state
1671        let mut lr_state = LRState::new(&grammar, 0.into(), grammar.symbol_index("E"))
1672            .add_item(LRItem {
1673                prod: 1.into(),
1674                prod_len: grammar.production_len(1.into()),
1675                rn_len: None,
1676                position: 1,
1677                follow: RefCell::new(Follow::new()),
1678            })
1679            .add_item(LRItem {
1680                prod: 2.into(),
1681                prod_len: grammar.production_len(2.into()),
1682                rn_len: None,
1683                position: 1,
1684                follow: RefCell::new(Follow::new()),
1685            })
1686            .add_item(LRItem {
1687                prod: 3.into(),
1688                prod_len: grammar.production_len(2.into()),
1689                rn_len: None,
1690                position: 1,
1691                follow: RefCell::new(Follow::new()),
1692            })
1693            .add_item(LRItem {
1694                prod: 4.into(),
1695                prod_len: grammar.production_len(3.into()),
1696                rn_len: None,
1697                position: 2,
1698                follow: RefCell::new(Follow::new()),
1699            });
1700
1701        let per_next_symbol = lr_state.group_per_next_symbol();
1702
1703        // log!("Symbols: {:#?}", grammar.symbol_names(per_next_symbol.keys()));
1704        // Symbols: ["+", "*", ")"]
1705        //log!("Pernext: {:?}", per_next_symbol);
1706        // Pernext: {SymbolIndex(1): [ItemIndex(0)], SymbolIndex(2): [ItemIndex(1)], SymbolIndex(4): [ItemIndex(2)]}
1707
1708        // Check items grouping per symbol
1709        assert_eq!(per_next_symbol.len(), 4);
1710        assert_eq!(
1711            per_next_symbol.keys().cloned().collect::<Vec<_>>(),
1712            [1, 2, 3, 5]
1713                .iter()
1714                .map(|v| SymbolIndex(*v))
1715                .collect::<Vec<_>>()
1716        );
1717        assert_eq!(
1718            per_next_symbol.values().cloned().collect::<Vec<_>>(),
1719            vec![
1720                vec![0.into()],
1721                vec![1.into()],
1722                vec![2.into()],
1723                vec![3.into()]
1724            ]
1725        );
1726
1727        // Check production based term priorities
1728        assert_eq!(
1729            lr_state.max_prior_for_term
1730                [&grammar.symbol_to_term_index(grammar.term_by_name["Power"])],
1731            3
1732        );
1733        assert_eq!(
1734            lr_state.max_prior_for_term
1735                [&grammar.symbol_to_term_index(grammar.term_by_name["Mul"])],
1736            2
1737        );
1738        assert_eq!(
1739            lr_state.max_prior_for_term
1740                [&grammar.symbol_to_term_index(grammar.term_by_name["Plus"])],
1741            1
1742        );
1743    }
1744
1745    #[test]
1746    fn test_merge_states() {
1747        let grammar = test_grammar();
1748        let lr_item_1 = LRItem {
1749            prod: ProdIndex(1),
1750            prod_len: 2,
1751            rn_len: None,
1752            position: 2,
1753            follow: RefCell::new(Follow::new()),
1754        };
1755        let lr_item_2 = LRItem {
1756            prod: ProdIndex(2),
1757            prod_len: 3,
1758            rn_len: None,
1759            position: 3,
1760            follow: RefCell::new(Follow::new()),
1761        };
1762        let old_state = LRState::new(&grammar, 0.into(), 0.into())
1763            .add_item(LRItem {
1764                follow: RefCell::new(follow([1, 3])),
1765                ..lr_item_1
1766            })
1767            .add_item(LRItem {
1768                follow: RefCell::new(follow([2])),
1769                ..lr_item_2
1770            });
1771
1772        // This should be merged as there are no introduced R/R conflicts
1773        let new_state_1 = LRState::new(&grammar, 0.into(), 0.into())
1774            .add_item(LRItem {
1775                follow: RefCell::new(follow([1])),
1776                ..lr_item_1
1777            })
1778            .add_item(LRItem {
1779                follow: RefCell::new(follow([2, 4])),
1780                ..lr_item_2
1781            });
1782        let mut old_state_1 = old_state.clone();
1783        let settings = Settings::default();
1784        assert!(LRTable::merge_state(
1785            &settings,
1786            &mut old_state_1,
1787            &new_state_1
1788        ));
1789        // When the merge succeed verify that items follows are indeed extended.
1790        assert_eq!(
1791            *old_state_1.items[ItemIndex(0)].follow.borrow(),
1792            follow([1, 3])
1793        );
1794        assert_eq!(
1795            *old_state_1.items[ItemIndex(1)].follow.borrow(),
1796            follow([2, 4])
1797        );
1798
1799        // This merge introduces new R/R conflict as the second item has 1 in
1800        // the follow set. Term 1 exists in the first item of the old state so
1801        // merging will make two items eligible for reduction on the term 1 in
1802        // the input.
1803        let new_state_2 = LRState::new(&grammar, 0.into(), 0.into())
1804            .add_item(LRItem {
1805                follow: RefCell::new(follow([3])),
1806                ..lr_item_1
1807            })
1808            .add_item(LRItem {
1809                follow: RefCell::new(follow([2, 1])),
1810                ..lr_item_2
1811            });
1812        let mut old_state_2 = old_state.clone();
1813        assert!(!LRTable::merge_state(
1814            &settings,
1815            &mut old_state_2,
1816            &new_state_2
1817        ));
1818        // Verify that no merge happened
1819        assert_eq!(
1820            *old_state_2.items[ItemIndex(0)].follow.borrow(),
1821            follow([1, 3])
1822        );
1823        assert_eq!(
1824            *old_state_2.items[ItemIndex(1)].follow.borrow(),
1825            follow([2])
1826        );
1827
1828        // The last thing to check is situation where new state has R/R
1829        // conflicts and there are no additional merge introduced R/R conflicts.
1830        // This time we should merge as the R/R conflict is not introduced by
1831        // merge process but exists due to the grammar not being LR(1).
1832        let new_state_3 = LRState::new(&grammar, 0.into(), 0.into())
1833            .add_item(LRItem {
1834                follow: RefCell::new(follow([1, 3])),
1835                ..lr_item_1
1836            })
1837            .add_item(LRItem {
1838                follow: RefCell::new(follow([2, 1])),
1839                ..lr_item_2
1840            });
1841        let mut old_state_3 = old_state.clone();
1842        assert!(LRTable::merge_state(
1843            &settings,
1844            &mut old_state_3,
1845            &new_state_3
1846        ));
1847        // Verify that merge happened
1848        assert_eq!(
1849            *old_state_3.items[ItemIndex(0)].follow.borrow(),
1850            follow([1, 3])
1851        );
1852        assert_eq!(
1853            *old_state_3.items[ItemIndex(1)].follow.borrow(),
1854            follow([2, 1])
1855        );
1856    }
1857
1858    #[test]
1859    fn test_closure() {
1860        let grammar = test_grammar();
1861        let firsts = first_sets(&grammar);
1862
1863        // Create some LR state
1864        let mut lr_state =
1865            LRState::new(&grammar, StateIndex(0), grammar.symbol_index("T")).add_item(
1866                LRItem::with_follow(&grammar, ProdIndex(1), None, follow([grammar.stop_index])),
1867            );
1868
1869        lr_state.closure(&firsts, &None);
1870
1871        let prods = [1, 4, 7, 8];
1872        let follow_sets = [
1873            grammar.symbol_indexes(&["STOP"]),
1874            grammar.symbol_indexes(&["STOP", "Plus"]),
1875            grammar.symbol_indexes(&["STOP", "Plus", "Mul"]),
1876            grammar.symbol_indexes(&["STOP", "Plus", "Mul"]),
1877        ];
1878
1879        assert_eq!(lr_state.items.len(), 4);
1880
1881        itertools::izip!(&lr_state.items, prods, follow_sets).for_each(|(item, prod, follows)| {
1882            assert_eq!(item.prod, prod.into());
1883            assert!(item.follow.borrow().iter().eq(follows.iter()));
1884        });
1885
1886        log!("{:?}", lr_state);
1887    }
1888
1889    #[test]
1890    fn test_lr_states_for_grammar_2() {
1891        let grammar = test_grammar_2();
1892
1893        let settings = Settings::new().table_type(TableType::LALR);
1894
1895        let table = LRTable::new(&grammar, &settings).unwrap();
1896        output_cmp!(
1897            "src/table/grammar_2.expected",
1898            format!("{:#?}", table.states)
1899        );
1900    }
1901
1902    #[test]
1903    fn test_lr_states_for_non_lalr_grammar() {
1904        let grammar = test_non_lalr_grammar();
1905
1906        // Calculating LR tables with LALR method will result in a state with
1907        // R/R conflicts. So, deterministic LR parsing method cannot be used for
1908        // this grammar and LALR construction method.
1909        //
1910        // Conflicts are found in state 2 which is entered when 'd' is
1911        // recognized in the input. There are two R/R conflicts, for inputs 'a'
1912        // and 'c'. In both case parser may reduce both A and B.
1913        let settings = Settings::new().table_type(TableType::LALR);
1914
1915        let table = LRTable::new(&grammar, &settings).unwrap();
1916
1917        output_cmp!(
1918            "src/table/grammar_nonlalr_lalr.expected",
1919            format!("{grammar}\n\n{:#?}", table.states)
1920        );
1921
1922        // In LALR_PAGER construction method R/R conflicts are avoided during
1923        // merge phase where states are kept split if merging would introduce
1924        // new R/R conflict. This essentially makes LALR_PAGER very close in
1925        // power to canonical LR(1) but with the number of states which is
1926        // almost like in LALR (i.e. LR(0)).
1927        //
1928        // In this case we have 13 states while in previous LALR case there was
1929        // 12 states.
1930        let settings = Settings::new().table_type(TableType::LALR_PAGER);
1931
1932        let table = LRTable::new(&grammar, &settings).unwrap();
1933
1934        output_cmp!(
1935            "src/table/grammar_nonlalr_lalr_pagerw.expected",
1936            format!("{grammar}\n\n{:#?}", table.states)
1937        );
1938    }
1939
1940    #[test]
1941    fn test_sorted_terminals() {
1942        let grammar: Grammar = r#"
1943            S: A | C | B;
1944            terminals
1945            A: /\d+/;
1946            B: "bb";
1947            C: "c";
1948            "#
1949        .parse()
1950        .unwrap();
1951
1952        let settings = Settings::new().table_type(TableType::LALR_PAGER);
1953
1954        let table = LRTable::new(&grammar, &settings).unwrap();
1955        assert_eq!(
1956            &table.states[StateIndex(0)]
1957                .sorted_terminals
1958                .iter()
1959                .map(|i| i.0 .0)
1960                .collect::<Vec<_>>(),
1961            &vec![2, 3, 1]
1962        );
1963    }
1964}