rustemo_compiler/table/
mod.rs

1//! Calculating LR tables
2use std::{
3    cell::RefCell,
4    cmp::{self, Ordering},
5    collections::{BTreeMap, BTreeSet, VecDeque},
6    fmt::{self, Display},
7    iter::{self, repeat},
8    ops::{Index, IndexMut},
9    slice::{Iter, IterMut},
10};
11
12use clap::ValueEnum;
13use itertools::{chain, Itertools};
14use rustemo::{log, LOG, LOG_BOLD};
15use yansi::Paint;
16
17use crate::{
18    create_index,
19    error::{Error, Result},
20    grammar::{Associativity, Priority, Terminal, DEFAULT_PRIORITY},
21    index::{
22        NonTermIndex, NonTermVec, ProdIndex, ProdVec, StateIndex, StateVec, SymbolIndex,
23        SymbolVec, TermIndex, TermVec,
24    },
25    lang::rustemo_actions::Recognizer,
26    settings::{ParserAlgo, Settings},
27};
28
29use super::grammar::{res_symbol, Grammar};
30
31#[derive(Debug, Clone)]
32pub enum Action {
33    Shift(StateIndex),
34    Reduce(ProdIndex, usize),
35    Accept,
36}
37
38/// Specifies the type of the parsing table used during parsing to decide about
39/// shift/reduce/goto operations.
40#[allow(non_camel_case_types)]
41#[derive(Debug, Default, Clone, PartialEq, Eq, ValueEnum)]
42pub enum TableType {
43    /// Lookahead LR tables. See
44    /// <http://publications.csail.mit.edu/lcs/pubs/pdf/MIT-LCS-TR-065.pdf>
45    LALR,
46    /// A slight modification of LALR tables to prevent some reduce/reduce
47    /// conflicts. Inspired by <https://doi.org/10.1007/BF00290336>
48    #[default]
49    LALR_PAGER,
50    /// LALR tables extended with right-nulled entries. Used for GLR parsing
51    /// (RNGLR). See <https://doi.org/10.1145/1146809.1146810>
52    LALR_RN,
53}
54
55type Firsts = BTreeSet<SymbolIndex>;
56type FirstSets = SymbolVec<Firsts>;
57
58create_index!(ItemIndex, ItemVec);
59
60/// LR State is a set of LR items and a dict of LR automata actions and gotos.
61#[derive(Clone)]
62pub struct LRState<'g> {
63    grammar: &'g Grammar,
64
65    /// The index of this state.
66    pub idx: StateIndex,
67
68    /// The grammar symbol related to this state. Intuitively, the grammar
69    /// symbol seen on a transition to this state. E.g. if the symbol is
70    /// terminal the parser did a Shift operation to enter this state, otherwise
71    /// it did reduce.
72    pub symbol: SymbolIndex,
73
74    /// LR(1) items used to construct this state.
75    items: ItemVec<LRItem>,
76
77    /// A terminal indexed vector of LR actions. Actions instruct LR parser to
78    /// Shift from the input, Reduce the top of the LR stack or accept the
79    /// input. For the deterministic parsing the internal vector of actions can
80    /// contain only one action.
81    pub actions: TermVec<Vec<Action>>,
82
83    /// A non-terminal indexed vector of LR GOTOs. GOTOs represent transitions
84    /// to another state after successful reduction of a non-terminal.
85    pub gotos: NonTermVec<Option<StateIndex>>,
86
87    /// Terminals and finish flags sorted by the priority and most specific
88    /// match (if enabled) for lexical disambiguation. The finish flag, if true,
89    /// indicates that if we already have terminals that matched at this
90    /// location no further termininals should be tried.
91    pub sorted_terminals: Vec<(TermIndex, bool)>,
92
93    /// Each production has a priority. We use this priority to resolve S/R and
94    /// R/R conflicts. Since the Shift operation is executed over terminal symbol
95    /// to resolve S/R we need terminal priority. But, the priority given for a
96    /// terminal directly is used in lexical disambiguation. Instead, we need
97    /// terminal priority inherited from productions. We, say that the priority
98    /// of terminals in S/R resolution will be the priority of the production
99    /// terminal is used in. But, since the same terminal can be used in many
100    /// production we will take the maximum for S/R resolution.
101    max_prior_for_term: BTreeMap<TermIndex, Priority>,
102}
103
104/// Two LR states are equal if they contain the same kernel items.
105impl PartialEq for LRState<'_> {
106    fn eq(&self, other: &Self) -> bool {
107        let self_ki = self.kernel_items();
108        let other_ki = other.kernel_items();
109        self_ki.len() == other_ki.len() && self_ki.iter().zip(other_ki.iter()).all(|(x, y)| x == y)
110    }
111}
112impl Eq for LRState<'_> {}
113
114impl Display for LRState<'_> {
115    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
116        write!(
117            f,
118            "{}\n\t{}",
119            format!(
120                "State {}:{}",
121                self.idx,
122                self.grammar.symbol_name(self.symbol)
123            )
124            .paint(LOG_BOLD),
125            self.items
126                .iter()
127                .map(|i| i.to_string(self.grammar))
128                .collect::<Vec<_>>()
129                .join("\n\t")
130        )
131    }
132}
133
134impl std::fmt::Debug for LRState<'_> {
135    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
136        f.debug_struct("LRState")
137            .field("idx", &self.idx)
138            .field("symbol", &self.symbol)
139            .field("items", &self.items)
140            .field("actions", &self.actions)
141            .field("gotos", &self.gotos)
142            .field("sorted_terminals", &self.sorted_terminals)
143            .field("max_prior_for_term", &self.max_prior_for_term)
144            .finish()
145    }
146}
147
148impl<'g> LRState<'g> {
149    fn new(grammar: &'g Grammar, index: StateIndex, symbol: SymbolIndex) -> Self {
150        Self {
151            grammar,
152            idx: index,
153            symbol,
154            items: ItemVec::new(),
155            actions: grammar.new_termvec(vec![]),
156            gotos: grammar.new_nontermvec(None),
157            max_prior_for_term: BTreeMap::new(),
158            sorted_terminals: Vec::new(),
159        }
160    }
161
162    fn new_with_items(
163        grammar: &'g Grammar,
164        index: StateIndex,
165        symbol: SymbolIndex,
166        items: ItemVec<LRItem>,
167    ) -> Self {
168        Self {
169            grammar,
170            idx: index,
171            symbol,
172            items,
173            actions: grammar.new_termvec(vec![]),
174            gotos: grammar.new_nontermvec(None),
175            max_prior_for_term: BTreeMap::new(),
176            sorted_terminals: Vec::new(),
177        }
178    }
179
180    fn add_item(mut self, item: LRItem) -> Self {
181        self.items.push(item);
182        self
183    }
184
185    fn kernel_items(&self) -> Vec<&LRItem> {
186        self.items.iter().filter(|i| i.is_kernel()).collect()
187    }
188
189    fn nonkernel_items(&self) -> Vec<&LRItem> {
190        self.items.iter().filter(|i| !i.is_kernel()).collect()
191    }
192
193    /// Closes over LR items of the LRState.
194    ///
195    /// Starting from the given items (usually just kernel items), for each
196    /// item, if right of the dot is a non-terminal, adds all items where LHS is
197    /// a given non-terminal and the dot is at the beginning. In other words,
198    /// adds all missing non-kernel items.
199    fn closure(&mut self, first_sets: &FirstSets, prod_rn_lengths: &Option<ProdVec<usize>>) {
200        loop {
201            // This is OK as Hash uses only non-interior-mutable part of the
202            // LRItem type.
203            #[allow(clippy::mutable_key_type)]
204            let mut new_items: BTreeSet<LRItem> = BTreeSet::new();
205
206            for item in &self.items {
207                if let Some(symbol) = item.symbol_at_position(self.grammar) {
208                    if self.grammar.is_nonterm(symbol) {
209                        let mut new_follow;
210                        // Find first set of substring that follow symbol at
211                        // position
212                        if item.position + 1 < self.grammar.productions[item.prod].rhs.len() {
213                            new_follow = firsts(
214                                self.grammar,
215                                first_sets,
216                                &self.grammar.production_rhs_symbols(item.prod)
217                                    [item.position + 1..],
218                            );
219                            // If symbols that follows the current nonterminal
220                            // can derive EMPTY add follows of current item.
221                            if new_follow.contains(&self.grammar.empty_index) {
222                                new_follow.remove(&self.grammar.empty_index);
223                                new_follow.extend(item.follow.borrow().iter());
224                            }
225                        } else {
226                            // If current item position is at the end add all of
227                            // its follow to the next item.
228                            new_follow = Follow::new();
229                            new_follow.extend(item.follow.borrow().iter());
230                        }
231
232                        // Get all productions of the current non-terminal and
233                        // create LR items with the calculated follow.
234                        let nonterm = self.grammar.symbol_to_nonterm_index(symbol);
235                        for prod in &self.grammar.nonterminals[nonterm].productions {
236                            new_items.insert(LRItem::with_follow(
237                                self.grammar,
238                                *prod,
239                                prod_rn_lengths.as_ref().map(|p| p[*prod]),
240                                new_follow.clone(),
241                            ));
242                        }
243                    }
244                }
245            }
246
247            // Add all new items to state.items. If item is already there update
248            // follow. If there is no change break from the loop.
249            let mut change = false;
250            for new_item in new_items {
251                match self.items.iter_mut().find(|x| *x == &new_item) {
252                    Some(item) => {
253                        // Item already exists, update follows
254                        let l = item.follow.borrow().len();
255                        item.follow
256                            .borrow_mut()
257                            .extend(new_item.follow.borrow().iter());
258                        if item.follow.borrow().len() > l {
259                            change = true;
260                        }
261                    }
262                    None => {
263                        self.items.push(new_item);
264                        change = true;
265                    }
266                }
267            }
268            if !change {
269                break;
270            }
271        }
272    }
273
274    /// Group LR items per grammar symbol right of the dot, and calculate
275    /// terminal max priorities.
276    fn group_per_next_symbol(&mut self) -> BTreeMap<SymbolIndex, Vec<ItemIndex>> {
277        let mut per_next_symbol: BTreeMap<SymbolIndex, Vec<ItemIndex>> = BTreeMap::new();
278
279        for (idx, item) in self.items.iter().enumerate() {
280            let symbol = item.symbol_at_position(self.grammar);
281            if let Some(symbol) = symbol {
282                per_next_symbol.entry(symbol).or_default().push(idx.into());
283                if self.grammar.is_term(symbol) {
284                    let symbol = self.grammar.symbol_to_term_index(symbol);
285                    let prod_prio = self.grammar.productions[item.prod].prio;
286                    self.max_prior_for_term
287                        .entry(symbol)
288                        .and_modify(|v| *v = cmp::max(*v, prod_prio))
289                        .or_insert(prod_prio);
290                }
291            }
292        }
293        per_next_symbol
294    }
295}
296
297/// Represents an item in the items set. Item is defined by a production and a
298/// position inside production (the dot). If the item is of LR_1 type follow set
299/// is also defined. Follow set is a set of terminals that can follow symbol at
300/// the given position in the given production.
301#[derive(Debug, Eq, Clone, PartialOrd, Ord)]
302struct LRItem {
303    prod: ProdIndex,
304
305    /// The length of the production
306    prod_len: usize,
307
308    /// Right-null production length - the last symbol in the production where
309    /// all the following symbols can reduce EMPTY.
310    /// Used in RN-GLR to decide when the item can reduce. `None` for LR.
311    rn_len: Option<usize>,
312
313    /// The position of "the dot" in the RHS of the production
314    position: usize,
315
316    follow: RefCell<Follow>,
317}
318
319impl std::hash::Hash for LRItem {
320    fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
321        self.prod.hash(state);
322        self.position.hash(state);
323    }
324}
325
326impl PartialEq for LRItem {
327    fn eq(&self, other: &Self) -> bool {
328        self.prod == other.prod && self.position == other.position
329    }
330}
331
332/// LRItem is a production with a dot in the RHS.
333///
334/// Intuitively, the dot signifies the position where the parsing process is in
335/// a given state. The beginning position is 0, before the first symbol in a
336/// production RHS. The end position is len(RHS), after the last symbol in a
337/// RHS.
338///
339/// LRItem also keeps a set of follow terminals. The item is valid only if the
340/// production is followed by a terminal from the given follow set.
341///
342/// # Example
343///
344/// ```rust
345/// // If prod with index 5 is A: B a C;
346/// let item = LRItem::new(5)
347///                 .next_item().unwrap()
348///                 .next_item().unwrap();
349/// assert_eq(&item.position, 2)
350/// ```
351///
352/// ```text
353/// A: B a . C;
354///        ^
355///        |------ position is 2
356/// ```
357impl LRItem {
358    #[cfg(test)]
359    fn new(grammar: &Grammar, prod: ProdIndex, rn_len: Option<usize>) -> Self {
360        LRItem {
361            prod,
362            prod_len: grammar.production_len(prod),
363            rn_len,
364            position: 0,
365            follow: RefCell::new(Follow::new()),
366        }
367    }
368
369    fn with_follow(
370        grammar: &Grammar,
371        prod: ProdIndex,
372        rn_len: Option<usize>,
373        follow: Follow,
374    ) -> Self {
375        LRItem {
376            prod,
377            prod_len: grammar.production_len(prod),
378            rn_len,
379            position: 0,
380            follow: RefCell::new(follow),
381        }
382    }
383
384    fn symbol_at_position(&self, grammar: &Grammar) -> Option<SymbolIndex> {
385        Some(res_symbol(
386            grammar.productions.get(self.prod)?.rhs.get(self.position)?,
387        ))
388    }
389
390    /// Moves position to the right.
391    #[inline]
392    fn inc_position(mut self) -> Self {
393        assert!(self.position < self.prod_len);
394        self.position += 1;
395        self
396    }
397
398    /// True if this item belongs to the kernel core.
399    ///
400    /// Kernel core items are those where position is not 0 except the augmented
401    /// production which by definition belongs to the core.
402    #[inline]
403    fn is_kernel(&self) -> bool {
404        self.position > 0 || self.prod == ProdIndex(0)
405    }
406
407    #[inline]
408    fn is_reducing(&self) -> bool {
409        self.position == self.prod_len
410            || match self.rn_len {
411                Some(rn_len) => self.position >= rn_len,
412                None => false,
413            }
414    }
415
416    fn to_string(&self, grammar: &Grammar) -> String {
417        let prod = &grammar.productions[self.prod];
418        let mut rhs = prod
419            .rhs_symbols()
420            .iter()
421            .map(|s| grammar.symbol_name(*s))
422            .collect::<Vec<_>>();
423        rhs.insert(self.position, ".".into());
424        format!(
425            "{}: {}: {}    {{{}}}",
426            prod.idx.to_string().paint(LOG),
427            grammar
428                .symbol_name(grammar.nonterm_to_symbol_index(prod.nonterminal))
429                .paint(LOG),
430            rhs.join(" "),
431            self.follow
432                .borrow()
433                .iter()
434                .map(|f| grammar.symbol_name(*f))
435                .collect::<Vec<_>>()
436                .join(", ")
437        )
438    }
439}
440
441pub enum ConflictKind {
442    ShiftReduce(ProdIndex),
443    ReduceReduce(ProdIndex, ProdIndex),
444}
445
446pub struct Conflict<'g, 's> {
447    state: &'s LRState<'g>,
448    follow: TermIndex,
449    kind: ConflictKind,
450}
451
452impl Display for Conflict<'_, '_> {
453    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
454        write!(f, "")?;
455        Ok(())
456    }
457}
458
459pub struct LRTable<'g, 's> {
460    pub states: StateVec<LRState<'g>>,
461    pub layout_state: Option<StateIndex>,
462    grammar: &'g Grammar,
463    settings: &'s Settings,
464    first_sets: FirstSets,
465
466    /// Right-nulled length of productions. Used in RNGLR. RN length is a
467    /// position in a production after which all the remaining symbols can
468    /// derive EMPTY.
469    ///
470    /// E.g. if there is 3 symbols at the RHS and the last one can derive EMPTY
471    /// but others can't then its rn length is 2. Or, in other words, this
472    /// length is the minimal length of reduction for this production.
473    pub production_rn_lengths: Option<ProdVec<usize>>,
474}
475
476impl<'g, 's> LRTable<'g, 's> {
477    pub fn new(grammar: &'g Grammar, settings: &'s Settings) -> Result<Self> {
478        let first_sets = first_sets(grammar);
479        let production_rn_lengths = match settings.table_type {
480            TableType::LALR_RN => Some(production_rn_lengths(&first_sets, grammar)),
481            TableType::LALR | TableType::LALR_PAGER => None,
482        };
483        let mut table = Self {
484            grammar,
485            settings,
486            states: StateVec::new(),
487            layout_state: None,
488            first_sets,
489            production_rn_lengths,
490        };
491
492        table.check_empty_sets()?;
493
494        table.calc_states(grammar.augmented_index);
495        if let Some(augmented_layout_index) = grammar.augmented_layout_index {
496            table.layout_state = Some(StateIndex(table.states.len()));
497            table.calc_states(augmented_layout_index)
498        }
499
500        log!("LR states constructed. Updating follows.");
501        table.propagate_follows();
502
503        log!(
504            "Calculate REDUCTION entries in ACTION tables and resolve \
505            possible conflicts."
506        );
507        table.calculate_reductions();
508
509        log!("Sort terminals for lexical disambiguation");
510        table.sort_terminals();
511
512        println!("Terminals: {}", grammar.terminals.len());
513        println!("Non-terminals: {}", grammar.nonterminals().len());
514        println!("Productions: {}", grammar.productions().len());
515        println!("States: {}", table.states.len());
516
517        if settings.print_table {
518            println!("LR TABLE:");
519            println!("{table}");
520        }
521
522        Ok(table)
523    }
524
525    /// Calculate LR states with GOTOs and ACTIONs for the given Grammar.
526    ///
527    /// This collection of states is used to generate LR/GLR parser tables.
528    fn calc_states(&mut self, start_symbol: SymbolIndex) {
529        let mut current_state_idx = self.states.len();
530
531        let prods = &self.grammar.symbol_to_nonterm(start_symbol).productions;
532        assert_eq!(prods.len(), 1);
533
534        // Create a state for the first production (augmented)
535        let state = LRState::new(self.grammar, StateIndex(current_state_idx), start_symbol)
536            .add_item(LRItem::with_follow(
537                self.grammar,
538                prods[0],
539                self.production_rn_lengths.as_ref().map(|p| p[prods[0]]),
540                Follow::from([self.grammar.stop_index]),
541            ));
542        current_state_idx += 1;
543
544        // States to be processed.
545        let mut state_queue = VecDeque::from([state]);
546
547        log!("Calculating LR automaton states.");
548        while let Some(mut state) = state_queue.pop_front() {
549            // For each state calculate its closure first, i.e. starting from a so
550            // called "kernel items" expand collection with non-kernel items. We
551            // will also calculate GOTO and ACTIONS dicts for each state. These
552            // dicts will be keyed by a grammar symbol.
553            state.closure(&self.first_sets, &self.production_rn_lengths);
554
555            // To find out other states we examine following grammar symbols in the
556            // current state (symbols following current position/"dot") and group
557            // all items by a grammar symbol.
558            let per_next_symbol = state.group_per_next_symbol();
559
560            // Create accept action if possible.
561            for &symbol in per_next_symbol.keys() {
562                if symbol == self.grammar.stop_index {
563                    state.actions[self.grammar.symbol_to_term_index(symbol)] =
564                        vec![Action::Accept];
565                    break;
566                }
567            }
568
569            // Create new states reachable from the current state.
570            let new_states = Self::create_new_states(self.grammar, &state, per_next_symbol);
571
572            // Find states that already exist and try to merge. If not possible
573            // to merge or not found push state to state queue.
574            for mut new_state in new_states {
575                let target_state_symbol = new_state.symbol;
576                let mut new_state_found = true;
577                let mut target_state_idx = StateIndex(current_state_idx);
578                for old_state in self
579                    .states
580                    .iter_mut()
581                    .chain(state_queue.iter_mut())
582                    .chain(iter::once(&mut state))
583                    .filter(|x| **x == new_state)
584                {
585                    // If the same state already exists try to merge.
586                    if Self::merge_state(self.settings, old_state, &new_state) {
587                        new_state_found = false;
588                        target_state_idx = old_state.idx;
589                        break;
590                    }
591                }
592
593                // Create GOTO for non-terminal or Shift Action for terminal.
594                if self.grammar.is_nonterm(target_state_symbol) {
595                    state.gotos[self.grammar.symbol_to_nonterm_index(target_state_symbol)] =
596                        Some(target_state_idx);
597                } else {
598                    let term = self.grammar.symbol_to_term_index(new_state.symbol);
599                    state.actions[term].push(Action::Shift(target_state_idx));
600                }
601
602                if new_state_found {
603                    // Merge is not possible. Create new state.
604                    new_state.idx = StateIndex(current_state_idx);
605
606                    state_queue.push_back(new_state);
607                    current_state_idx += 1;
608                }
609            }
610
611            self.states.push(state);
612        }
613    }
614
615    /// Try to merge new_state to old_state if possible. If not possible return
616    /// false.
617    ///
618    /// If old state has no R/R conflicts additional check is made and merging is
619    /// not done if it would add R/R conflict.
620    fn merge_state(
621        settings: &Settings,
622        old_state: &mut LRState<'g>,
623        new_state: &LRState<'g>,
624    ) -> bool {
625        // States with different kernel sets cannot be merged.
626        if old_state != new_state {
627            return false;
628        }
629
630        let old_state_items = old_state
631            .items
632            .clone()
633            .into_iter()
634            .filter(|item| item.is_kernel());
635
636        // Item pairs of item from an old state and corresponding item from the new state.
637        let item_pairs: Vec<(&mut LRItem, &LRItem)> = iter::zip(
638            old_state.items.iter_mut().filter(|item| item.is_kernel()),
639            old_state_items.map(|x| new_state.items.iter().find(|&i| *i == x).unwrap()),
640        )
641        .collect();
642
643        if settings.table_type != TableType::LALR {
644            // If this is not pure LALR check to see if merging would introduce R/R.
645            // In case it would, do not merge but keep these states split.
646            //
647            // Menhir's week compatibility test (variant of Pager's weak comp. test).
648            // See Definition 2.29 from the paper:
649            // Denny, Joel E., and Brian A. Malloy. "The IELR (1) algorithm for generating
650            // minimal LR (1) parser tables for non-LR (1) grammars with conflict
651            // resolution." Science of Computer Programming 75.11 (2010): 943-979.
652            for (old, new) in &item_pairs {
653                if !old.is_reducing() {
654                    continue;
655                }
656                for (old_in, new_in) in &item_pairs {
657                    if old == old_in {
658                        continue;
659                    }
660                    // ∀t, at least one of the following is true:
661                    // (a) t ∈ {K1 ∩ K2' } ∧ t ∈ {K2 ∩ K1' }.
662                    // (b) t ∈ {K1 ∩ K2 }.
663                    // (c) t ∈ {K1' ∩ K2' }.
664                    for t in chain(
665                        old.follow.borrow().intersection(&*new_in.follow.borrow()),
666                        old_in.follow.borrow().intersection(&*new.follow.borrow()),
667                    ) {
668                        // Test (b) and (c) conditions
669                        if old
670                            .follow
671                            .borrow()
672                            .intersection(&*old_in.follow.borrow())
673                            .all(|x| x != t)
674                            && new
675                                .follow
676                                .borrow()
677                                .intersection(&*new_in.follow.borrow())
678                                .all(|x| x != t)
679                        {
680                            // We have found terminal which would introduce RR
681                            // conflict if states are merged.
682                            return false;
683                        }
684                    }
685                }
686            }
687        }
688
689        // Do the merge by updating old items follow sets.
690        for (old, new) in item_pairs {
691            old.follow.borrow_mut().extend(new.follow.borrow().iter())
692        }
693        true
694    }
695
696    /// Propagate LR items follows.
697    ///
698    /// This is needed due to state merging. Whenever merge occurs, target state
699    /// follows might get updated so we have to propagate those changes to other
700    /// states.
701    fn propagate_follows(&mut self) {
702        let mut changed = true;
703        while changed {
704            changed = false;
705            for state in self.states.iter_mut() {
706                // Refresh closure to propagate follows from kernel items to
707                // non-kernel of the same state as the merge is done only for kernel
708                // items.
709                state.closure(&self.first_sets, &self.production_rn_lengths);
710            }
711
712            for state in self.states.iter() {
713                // Use GOTOs and ACTIONS to propagate follows between states.
714                state
715                    .gotos
716                    .iter()
717                    .filter_map(|x| x.as_ref())
718                    .chain(state.actions.iter().flat_map(|x| {
719                        x.iter().filter_map(|a| match a {
720                            Action::Shift(state) => Some(state),
721                            _ => None,
722                        })
723                    }))
724                    .for_each(|&target_state| {
725                        for target_item in &mut self.states[target_state]
726                            .items
727                            .iter()
728                            .filter(|x| x.is_kernel())
729                        {
730                            // Find corresponding item in state
731                            if let Some(source_item) = state.items.iter().find(|&x| {
732                                x.prod == target_item.prod
733                                    && x.position == target_item.position - 1
734                            }) {
735                                // Update follow of target item with item from state
736                                let follow_len = target_item.follow.borrow().len();
737                                target_item
738                                    .follow
739                                    .borrow_mut()
740                                    .extend(source_item.follow.borrow().iter());
741
742                                // if target item follow was changed set changed to true
743                                if target_item.follow.borrow().len() > follow_len {
744                                    changed = true
745                                }
746                            }
747                        }
748                    })
749            }
750        }
751    }
752
753    /// Calculate reductions entries in action tables and resolve possible
754    /// conflicts.
755    fn calculate_reductions(&mut self) {
756        let mut aug_symbols = vec![self.grammar.augmented_index];
757        if let Some(layout_index) = self.grammar.augmented_layout_index {
758            aug_symbols.push(layout_index);
759        }
760        for state in &mut self.states {
761            for item in state.items.iter().filter(|x| x.is_reducing()) {
762                let prod = &self.grammar.productions[item.prod];
763
764                // Accept if reducing by augmented productions for STOP
765                // lookahead but do not take into account RN reductions as the
766                // whole AUG production may be right nullable which would
767                // produce an Accept action from the initial state which would
768                // be in conflict with empty reduction.
769                if aug_symbols.contains(&self.grammar.nonterm_to_symbol_index(prod.nonterminal)) {
770                    if item.position == item.prod_len {
771                        let actions = &mut state.actions[TermIndex(0)];
772                        actions.push(Action::Accept);
773                    }
774                    continue;
775                }
776
777                let new_reduce = Action::Reduce(item.prod, item.position);
778                for follow_symbol in item.follow.borrow().iter() {
779                    let follow_term = self.grammar.symbol_to_term(*follow_symbol);
780                    let actions = &mut state.actions[follow_term.idx];
781                    if actions.is_empty() {
782                        // No other action are possible for this follow terminal.
783                        // Just register this reduction.
784                        actions.push(new_reduce.clone());
785                    } else {
786                        // Conflict. Try to resolve.
787                        let (shifts, reduces): (Vec<_>, Vec<_>) = actions
788                            .clone()
789                            .into_iter()
790                            .partition(|x| matches!(x, Action::Shift(_) | Action::Accept));
791                        // Only one SHIFT or ACCEPT might exists for a single
792                        // terminal but many REDUCEs might exist.
793                        assert!(shifts.len() <= 1);
794
795                        let mut should_reduce = true;
796                        if let Some(shift) = shifts.first() {
797                            // Shift/Reduce conflict. Use assoc and priority to
798                            // resolve. For disambiguation treat ACCEPT action the
799                            // same as SHIFT.
800                            let shift_prio = match shift {
801                                Action::Accept => DEFAULT_PRIORITY,
802                                _ => state.max_prior_for_term[&follow_term.idx],
803                            };
804                            match prod.prio.cmp(&shift_prio) {
805                                Ordering::Less => {
806                                    // If priority of existing SHIFT action is
807                                    // higher then leave it instead
808                                    should_reduce = false
809                                }
810                                Ordering::Equal => {
811                                    // If priorities are the same use associativity
812                                    // Terminals associativity has priority over
813                                    // production associativity
814                                    match (&prod.assoc, &follow_term.assoc) {
815                                        (Associativity::Left, Associativity::None)
816                                        | (_, Associativity::Right) => {
817                                            // Override SHIFT with this REDUCE
818                                            assert!(actions.len() == 1);
819                                            actions.pop();
820                                        }
821                                        (Associativity::Right, Associativity::None)
822                                        | (_, Associativity::Left) => {
823                                            // If associativity is right leave SHIFT
824                                            // action as "stronger" and don't consider
825                                            // this reduction any more. Right
826                                            // associative reductions can't be in the
827                                            // same set of actions together with SHIFTs.
828                                            should_reduce = false;
829                                        }
830                                        (Associativity::None, Associativity::None) => {
831                                            // If priorities are the same and no
832                                            // associativity defined use preferred
833                                            // strategy.
834                                            let empty = prod.rhs.is_empty();
835                                            let prod_pse = empty
836                                                && self.settings.prefer_shifts_over_empty
837                                                && !prod.nopse;
838                                            let prod_ps = !empty
839                                                && self.settings.prefer_shifts
840                                                && !prod.nops;
841                                            should_reduce = !(prod_pse || prod_ps);
842                                        }
843                                    }
844                                }
845                                Ordering::Greater => {
846                                    // This item operation priority is higher =>
847                                    // override with reduce
848                                    assert!(actions.len() == 1);
849                                    actions.pop();
850                                }
851                            }
852                        }
853
854                        if should_reduce {
855                            if reduces.is_empty() {
856                                actions.push(new_reduce.clone())
857                            } else {
858                                // REDUCE/REDUCE conflicts.
859                                // Try to resolve using priorities.
860                                let reduces_prio = reduces
861                                    .iter()
862                                    .map(|x| match x {
863                                        Action::Reduce(prod, ..) => {
864                                            self.grammar.productions[*prod].prio
865                                        }
866                                        other => panic!("This should not happen. Got {other:?}"),
867                                    })
868                                    .collect::<Vec<_>>();
869                                if reduces_prio.iter().all(|x| prod.prio < *x) {
870                                    // Current product priority is less than all
871                                    // other reductions. Do not add this reduction.
872                                } else if reduces_prio.iter().all(|x| prod.prio > *x) {
873                                    // Current product priority is greater than all
874                                    // other reductions. This reduction should
875                                    // replace all others.
876                                    actions.retain(|x| !matches!(x, Action::Reduce(..)));
877                                    actions.push(new_reduce.clone())
878                                } else {
879                                    // For LR parsing non-empty reductions are
880                                    // preferred over empty...
881                                    if let ParserAlgo::LR = self.settings.parser_algo {
882                                        // ... so remove all empty reductions.
883                                        actions.retain(
884                                            |x| !matches!(x, Action::Reduce(_, len) if *len == 0),
885                                        );
886
887                                        if item.prod_len > 0 || actions.is_empty() {
888                                            // If current reduction is non-empty add it.
889                                            actions.push(new_reduce.clone())
890                                        }
891                                    } else {
892                                        // This R/R conflict can't be resolved.
893                                        // Just add the new reduction and GLR
894                                        // will handle it by investigating all
895                                        // possibilities.
896                                        actions.push(new_reduce.clone())
897                                    }
898                                }
899                            }
900                        }
901                    }
902                }
903            }
904        }
905    }
906
907    /// Sort terminals for each state according to explicit priority and
908    /// terminal recognizer type. String recognizers have precedence over regex
909    /// recognizers if most specific match strategy is enabled. For most
910    /// specific match, longer string recognizers have precedence over shorter.
911    fn sort_terminals(&mut self) {
912        for state in &mut self.states {
913            let mut terminals = state
914                .actions
915                .iter()
916                .enumerate()
917                .filter(|(_, actions)| !actions.is_empty())
918                .map(|(idx, _)| self.grammar.term_by_index(TermIndex(idx)))
919                .collect::<Vec<_>>();
920
921            let term_prio = |term: &Terminal| -> u32 {
922                term.prio * 1000
923                    + if self.settings.lexical_disamb_most_specific {
924                        match &term.recognizer {
925                            Some(recognizer) => {
926                                (match recognizer {
927                                    Recognizer::StrConst(str_rec) => str_rec.as_ref().len(),
928                                    Recognizer::RegexTerm(_) => 0,
929                                }) as u32
930                            }
931                            None => 0,
932                        }
933                    } else {
934                        0
935                    }
936            };
937            terminals.sort_by(|&l, &r| {
938                let l_term_prio = term_prio(l);
939                let r_term_prio = term_prio(r);
940                r_term_prio.cmp(&l_term_prio)
941            });
942
943            // Calculate "finish" flags
944            let mut sorted_terminals: Vec<(TermIndex, bool)> = vec![];
945            let mut last_prio = None;
946            for terminal in terminals {
947                let finish = self.settings.lexical_disamb_most_specific
948                    && matches!(terminal.recognizer, Some(Recognizer::StrConst(_)));
949                let last_finish = last_prio.is_some_and(|prio| terminal.prio != prio);
950                last_prio = Some(terminal.prio);
951                if let Some(t) = sorted_terminals.last_mut() {
952                    t.1 |= last_finish
953                }
954                sorted_terminals.push((terminal.idx, finish));
955            }
956
957            state.sorted_terminals = sorted_terminals;
958        }
959    }
960
961    /// Create new states that can be reached from the given state.
962    fn create_new_states(
963        grammar: &'g Grammar,
964        state: &LRState,
965        per_next_symbol: BTreeMap<SymbolIndex, Vec<ItemIndex>>,
966    ) -> Vec<LRState<'g>> {
967        let mut states = Vec::new();
968        for (symbol, items) in per_next_symbol {
969            let next_state_items = items
970                .into_iter()
971                .map(|i| state.items[i].clone().inc_position())
972                .collect();
973            states.push(LRState::new_with_items(
974                grammar,
975                StateIndex(0), // Temporary value. The caller will set the real index.
976                symbol,
977                next_state_items,
978            ));
979        }
980        states
981    }
982
983    /// Check for states with GOTO links but without SHIFT links.
984    ///
985    /// This is invalid as GOTO links will never be traversed.
986    fn check_empty_sets(&self) -> Result<()> {
987        if let Some((idx, _)) = self
988            .first_sets
989            .iter()
990            .enumerate()
991            .find(|(_, s)| s.is_empty())
992        {
993            return Err(Error::Error(format!(
994                "First set empty for grammar symbol {:?}.\n\
995                 An infinite recursion on the grammar symbol.",
996                &self.grammar.symbol_name(SymbolIndex(idx))
997            )));
998        }
999        Ok(())
1000    }
1001
1002    pub fn get_conflicts(&'s self) -> Vec<Conflict<'g, 's>> {
1003        self.states
1004            .iter()
1005            .flat_map(|state| {
1006                state
1007                    .actions
1008                    .iter()
1009                    .enumerate()
1010                    .filter(|(_, actions)| actions.len() > 1)
1011                    .flat_map(|(idx, actions)| {
1012                        actions
1013                            .iter()
1014                            .combinations(2)
1015                            .map(move |conflict| (idx, conflict))
1016                    })
1017                    .map(|(term_index, conflict)| {
1018                        let kind = match &conflict[..] {
1019                            [Action::Shift(_), Action::Reduce(prod, _)]
1020                            | [Action::Reduce(prod, _), Action::Shift(_)] => {
1021                                ConflictKind::ShiftReduce(*prod)
1022                            }
1023                            [Action::Reduce(prod1, _), Action::Reduce(prod2, _)] => {
1024                                ConflictKind::ReduceReduce(*prod1, *prod2)
1025                            }
1026                            e => {
1027                                // This cannot happen as we have combinations of size 2.
1028                                eprint!("{e:?}");
1029                                unreachable!()
1030                            }
1031                        };
1032                        Conflict {
1033                            state,
1034                            follow: TermIndex(term_index),
1035                            kind,
1036                        }
1037                    })
1038            })
1039            .collect()
1040    }
1041
1042    pub fn print_conflicts_report(&self, conflicts: &Vec<Conflict<'g, 's>>) {
1043        for conflict in conflicts {
1044            println!("{} {}", "In".paint(LOG_BOLD), conflict.state);
1045            print!(
1046                "When I saw {} and see token {} ahead I can't decide",
1047                self.grammar.symbol_name(conflict.state.symbol).paint(LOG),
1048                self.grammar
1049                    .symbol_name(self.grammar.term_to_symbol_index(conflict.follow))
1050                    .paint(LOG)
1051            );
1052            match conflict.kind {
1053                ConflictKind::ShiftReduce(prod) => {
1054                    println!(
1055                        " should I shift or reduce by production:\n{}\n",
1056                        self.grammar.productions[prod]
1057                            .to_string(self.grammar)
1058                            .paint(LOG)
1059                    );
1060                }
1061                ConflictKind::ReduceReduce(prod1, prod2) => {
1062                    println!(
1063                        " should I reduce by production:\n{}\nor production:\n{}\n",
1064                        self.grammar.productions[prod1]
1065                            .to_string(self.grammar)
1066                            .paint(LOG),
1067                        self.grammar.productions[prod2]
1068                            .to_string(self.grammar)
1069                            .paint(LOG)
1070                    );
1071                }
1072            }
1073        }
1074        let shift_reduce_len = conflicts
1075            .iter()
1076            .filter(|c| matches!(c.kind, ConflictKind::ShiftReduce(..)))
1077            .count();
1078        let reduce_reduce_len = conflicts
1079            .iter()
1080            .filter(|c| matches!(c.kind, ConflictKind::ReduceReduce(..)))
1081            .count();
1082        println!(
1083            "{}",
1084            format!(
1085                "{} conflict(s). {} Shift/Reduce and {} Reduce/Reduce.",
1086                shift_reduce_len + reduce_reduce_len,
1087                shift_reduce_len,
1088                reduce_reduce_len
1089            )
1090            .paint(LOG)
1091        );
1092    }
1093
1094    /// Maximal number of actions per state/token. For LR can't be >1.
1095    #[inline]
1096    pub fn max_actions(&self) -> usize {
1097        self.states
1098            .iter()
1099            .map(|state| state.actions.iter().map(|a| a.len()).max().unwrap_or(0))
1100            .max()
1101            .unwrap()
1102    }
1103
1104    #[inline]
1105    pub fn max_recognizers(&self) -> usize {
1106        self.states
1107            .iter()
1108            .map(|state| state.actions.iter().filter(|a| !a.is_empty()).count())
1109            .max()
1110            .unwrap()
1111    }
1112
1113    pub fn to_dot(&self) -> String {
1114        let mut dot = String::from(
1115            r#"
1116            digraph grammar {
1117            rankdir=LR
1118            fontname = "Bitstream Vera Sans"
1119            fontsize = 8
1120            node[
1121                shape=record,
1122                style=filled,
1123                fillcolor=aliceblue
1124            ]
1125            nodesep = 0.3
1126            edge[dir=black,arrowtail=empty]
1127
1128        "#,
1129        );
1130
1131        let dot_escape = |s: &String| {
1132            s.replace('\n', r"\n")
1133                .replace('\\', "\\\\")
1134                .replace('"', r#"\""#)
1135                .replace('|', r"\|")
1136                .replace('{', r"\{")
1137                .replace('}', r"\}")
1138                .replace('>', r"\>")
1139                .replace('<', r"\<")
1140                .replace('?', r"\?")
1141        };
1142
1143        yansi::disable();
1144
1145        let conflicts = self.get_conflicts();
1146        let is_conflict_state = |state: &LRState| conflicts.iter().any(|s| s.state == state);
1147
1148        for state in &self.states {
1149            let mut kernel_items_str = String::new();
1150            for item in state.kernel_items() {
1151                kernel_items_str += &format!("{}\\l", dot_escape(&item.to_string(self.grammar)));
1152            }
1153
1154            let nonkernel_items = state.nonkernel_items();
1155            let mut nonkernel_items_str = if !nonkernel_items.is_empty() {
1156                String::from("|")
1157            } else {
1158                String::new()
1159            };
1160
1161            for item in nonkernel_items {
1162                nonkernel_items_str +=
1163                    &format!("{}\\l", dot_escape(&item.to_string(self.grammar)));
1164            }
1165
1166            let mut reductions: Vec<String> = vec![];
1167            for term in &self.grammar.terminals {
1168                let mut term_reduction_prods: Vec<String> = vec![];
1169                for action in &state.actions[term.idx] {
1170                    match action {
1171                        Action::Shift(target_state_idx) => {
1172                            dot += &format!(
1173                                "{} -> {} [label=\"SHIFT:{}\"]\n",
1174                                state.idx, target_state_idx, term.name
1175                            )
1176                        }
1177                        Action::Reduce(prod_idx, len) => {
1178                            term_reduction_prods.push(format!("({prod_idx},{len})"));
1179                        }
1180                        Action::Accept => {
1181                            dot += &format!("{} -> ACCEPT [label=\"{}\"]\n", state.idx, term.name)
1182                        }
1183                    }
1184                }
1185                if !term_reduction_prods.is_empty() {
1186                    let r = term_reduction_prods.join(", ");
1187                    reductions.push(if term_reduction_prods.len() > 1 {
1188                        format!("{}:[{}]", dot_escape(&term.name), r)
1189                    } else {
1190                        format!("{}:{}", dot_escape(&term.name), r)
1191                    });
1192                }
1193            }
1194            let reductions = if !reductions.is_empty() {
1195                format!("|Reductions:\\l{}", reductions.join(", "))
1196            } else {
1197                String::new()
1198            };
1199
1200            dot += &format!(
1201                "{} [label=\"{}|{}{}{}\"{}]\n",
1202                state.idx,
1203                dot_escape(&format!(
1204                    "{}:{}",
1205                    state.idx,
1206                    self.grammar.symbol_name(state.symbol)
1207                )),
1208                kernel_items_str,
1209                nonkernel_items_str,
1210                reductions,
1211                if is_conflict_state(state) {
1212                    ", fillcolor=\"lightpink\""
1213                } else {
1214                    ""
1215                }
1216            );
1217
1218            // GOTOs
1219            for nonterm in &self.grammar.nonterminals {
1220                if let Some(target_state_idx) = state.gotos[nonterm.idx] {
1221                    dot += &format!(
1222                        "{} -> {} [label=\"GOTO:{}\"]\n",
1223                        state.idx, target_state_idx, nonterm.name
1224                    )
1225                }
1226            }
1227        }
1228
1229        yansi::enable();
1230
1231        dot += "\n}\n";
1232        dot
1233    }
1234}
1235
1236fn production_rn_lengths(
1237    first_sets: &SymbolVec<BTreeSet<SymbolIndex>>,
1238    grammar: &Grammar,
1239) -> ProdVec<usize> {
1240    let mut prod_rn_lens = ProdVec::new();
1241    for production in &grammar.productions {
1242        let mut rn_len = production.rhs.len();
1243        for symbol in production.rhs_symbols().iter().rev() {
1244            if first_sets[*symbol].contains(&grammar.empty_index) {
1245                rn_len -= 1;
1246            } else {
1247                break;
1248            }
1249        }
1250        prod_rn_lens.push(rn_len)
1251    }
1252    prod_rn_lens
1253}
1254
1255impl Display for LRTable<'_, '_> {
1256    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1257        for state in &self.states {
1258            writeln!(
1259                f,
1260                "\n{}",
1261                format!(
1262                    "State {}:{}",
1263                    state.idx,
1264                    self.grammar.symbol_name(state.symbol)
1265                )
1266                .paint(LOG),
1267            )?;
1268            for item in &state.items {
1269                writeln!(f, "\t{}", item.to_string(self.grammar))?;
1270            }
1271            let actions = state
1272                .actions
1273                .iter()
1274                .enumerate()
1275                .filter(|(_, a)| !a.is_empty())
1276                .flat_map(|(i, a)| repeat(i).zip(a.iter()))
1277                .map(|(i, a)| {
1278                    (
1279                        self.grammar.terminals[TermIndex(i)].name.clone(),
1280                        match a {
1281                            Action::Shift(s) => format!("Shift to {s}"),
1282                            Action::Reduce(p, l) => {
1283                                format!(
1284                                    "Reduce for len {l} by:   {}",
1285                                    self.grammar.productions[*p].to_string(self.grammar)
1286                                )
1287                            }
1288                            Action::Accept => "Accept".into(),
1289                        },
1290                    )
1291                })
1292                .collect::<Vec<_>>();
1293
1294            if !actions.is_empty() {
1295                writeln!(f, "{}", "\tACTIONS:".paint(LOG))?;
1296                for (term, action) in actions {
1297                    writeln!(f, "\t\t{term} {} {action}", "=>".paint(LOG))?;
1298                }
1299            }
1300
1301            // GOTOs
1302            let gotos = state
1303                .gotos
1304                .iter()
1305                .enumerate()
1306                .filter(|(_, g)| g.is_some())
1307                .map(|(idx, g)| {
1308                    let g = g.unwrap();
1309                    (
1310                        self.grammar.nonterminals[NonTermIndex(idx)].name.clone(),
1311                        format!(
1312                            "State {}:{}",
1313                            self.grammar.symbol_name(self.states[g].symbol),
1314                            g.0
1315                        ),
1316                    )
1317                })
1318                .collect::<Vec<_>>();
1319
1320            if !gotos.is_empty() {
1321                writeln!(f, "{}", "\tGOTOs:".paint(LOG))?;
1322                for (nonterm, goto) in gotos {
1323                    writeln!(f, "\t\t{nonterm} {} {goto}", "=>".paint(LOG))?;
1324                }
1325            }
1326        }
1327        Ok(())
1328    }
1329}
1330
1331/// Calculates the sets of terminals that can start the sentence derived from all
1332/// grammar symbols.
1333///
1334/// The Dragon book p. 221.
1335fn first_sets(grammar: &Grammar) -> FirstSets {
1336    let mut first_sets = SymbolVec::new();
1337
1338    // First set for each terminal contains only the terminal itself.
1339    for terminal in &grammar.terminals {
1340        let mut new_set = Firsts::new();
1341        new_set.insert(terminal.idx.symbol_index());
1342        first_sets.push(new_set);
1343    }
1344
1345    // Initialize empty sets for nonterminals
1346    grammar
1347        .nonterminals
1348        .iter()
1349        .for_each(|_| first_sets.push(Firsts::new()));
1350
1351    // EMPTY derives EMPTY
1352    first_sets[grammar.empty_index].insert(grammar.empty_index);
1353
1354    let mut additions = true;
1355    while additions {
1356        additions = false;
1357        for production in &grammar.productions {
1358            let lhs_nonterm = grammar.nonterm_to_symbol_index(production.nonterminal);
1359
1360            let rhs_firsts = firsts(grammar, &first_sets, &production.rhs_symbols());
1361
1362            let lhs_len = first_sets[lhs_nonterm].len();
1363
1364            first_sets[lhs_nonterm].extend(rhs_firsts);
1365
1366            // Check if any addition is actually performed.
1367            if lhs_len < first_sets[lhs_nonterm].len() {
1368                additions = true
1369            }
1370        }
1371    }
1372    first_sets
1373}
1374
1375/// For the given sequence of symbols finds a set of FIRST terminals.
1376///
1377/// Finds all terminals which can start the given sequence of symbols. Note that
1378/// if all symbols in the sequence can derive EMPTY, EMPTY will be a part of the
1379/// returned set.
1380fn firsts(grammar: &Grammar, first_sets: &FirstSets, symbols: &[SymbolIndex]) -> Firsts {
1381    let mut firsts = Firsts::new();
1382    let mut break_out = false;
1383    for &symbol in symbols {
1384        let symbol_firsts = &first_sets[symbol];
1385        let mut empty = false;
1386
1387        for first in symbol_firsts {
1388            if *first == grammar.empty_index {
1389                empty = true;
1390            } else {
1391                firsts.insert(*first);
1392            }
1393        }
1394
1395        // We should proceed to the next symbol in sequence only if the current
1396        // symbol can produce EMPTY.
1397        if !empty {
1398            break_out = true;
1399            break;
1400        }
1401    }
1402    if !break_out {
1403        // If we reached the end of symbol sequence and each symbol along the
1404        // way could derive EMPTY than we must add EMPTY to the firsts.
1405        firsts.insert(grammar.empty_index);
1406    }
1407    firsts
1408}
1409
1410/// Calculates the sets of terminals that can follow some non-terminal for the
1411/// given grammar.
1412///
1413/// The dragon book p.221
1414/// Currently unused
1415type Follow = BTreeSet<SymbolIndex>;
1416#[allow(dead_code)]
1417type FollowSets = SymbolVec<Follow>;
1418#[cfg(test)]
1419fn follow_sets(grammar: &Grammar, first_sets: &FirstSets) -> FollowSets {
1420    let mut follow_sets = FollowSets::new();
1421    for _ in 0..first_sets.len() {
1422        follow_sets.push(Follow::new());
1423    }
1424
1425    // Rule 1: Place $ in FOLLOW(S), where S is the start symbol, and $ is
1426    // the input right endmarker.
1427    follow_sets[grammar.augmented_index].insert(grammar.stop_index);
1428
1429    let mut additions = true;
1430    while additions {
1431        additions = false;
1432        for production in &grammar.productions {
1433            let lhs_symbol = grammar.nonterm_to_symbol_index(production.nonterminal);
1434
1435            // Rule 2: If there is a production A -> α B β then everything in
1436            // FIRST(β) except EMPTY is in FOLLOW(B).
1437            for idx in 0..production.rhs.len() {
1438                let rhs_symbol = production.rhs_symbol(idx);
1439                let elements = follow_sets[rhs_symbol].len();
1440                let mut break_out = false;
1441                for rnext in &production.rhs[idx + 1..] {
1442                    let follow_symbols = &first_sets[res_symbol(rnext)];
1443
1444                    follow_sets[rhs_symbol]
1445                        .extend(follow_symbols.iter().filter(|&&s| s != grammar.empty_index));
1446
1447                    if !follow_symbols.contains(&grammar.empty_index) {
1448                        break_out = true;
1449                        break;
1450                    }
1451                }
1452
1453                if !break_out {
1454                    // Rule 3: If all symbols right of current RHS produce EMPTY
1455                    // then this RHS symbol must contain all what follows symbol
1456                    // at LHS.
1457                    let lhs_follows: Follow = follow_sets[lhs_symbol].iter().copied().collect();
1458                    follow_sets[rhs_symbol].extend(lhs_follows.iter());
1459                }
1460
1461                if follow_sets[rhs_symbol].len() > elements {
1462                    additions = true
1463                }
1464            }
1465        }
1466    }
1467    follow_sets
1468}
1469
1470#[cfg(test)]
1471mod tests {
1472
1473    use std::cell::RefCell;
1474    use std::collections::BTreeSet;
1475
1476    use crate::index::{ProdIndex, StateIndex, SymbolIndex};
1477    use crate::table::{first_sets, follow_sets, ItemIndex, LRTable, TableType};
1478    use crate::{
1479        grammar::Grammar,
1480        output_cmp,
1481        settings::Settings,
1482        table::{Follow, LRItem},
1483    };
1484
1485    use super::{production_rn_lengths, LRState};
1486
1487    fn follow<T, I>(indexes: T) -> BTreeSet<SymbolIndex>
1488    where
1489        T: IntoIterator<Item = I>,
1490        I: Into<SymbolIndex>,
1491    {
1492        indexes.into_iter().map(|i| i.into()).collect()
1493    }
1494
1495    fn test_grammar() -> Grammar {
1496        r#"
1497        E: T Ep;
1498        Ep: "+" T Ep | EMPTY;
1499        T: F Tp;
1500        Tp: "*" F Tp | EMPTY;
1501        F: "(" E ")" | "id";
1502
1503        terminals
1504        Plus: "+";
1505        Mul: "*";
1506        LParen: "(";
1507        RParen: ")";
1508        id: "id";
1509        "#
1510        .parse()
1511        .unwrap()
1512    }
1513
1514    fn test_grammar_2() -> Grammar {
1515        r#"
1516        E: E "+" T | T;
1517        T: T "*" F | F;
1518        F: "(" E ")" | "id";
1519
1520        terminals
1521        Plus: "+";
1522        Mul: "*";
1523        LParen: "(";
1524        RParen: ")";
1525        id: "id";
1526        "#
1527        .parse()
1528        .unwrap()
1529    }
1530
1531    /// Grammar from the Dragon book, p.278
1532    /// This grammar is LR(1) but not LALR.
1533    /// See also: https://www.gnu.org/software/bison/manual/bison.html#Mysterious-Conflicts
1534    fn test_non_lalr_grammar() -> Grammar {
1535        r#"
1536        S: A "a" | "b" A "c" | B "c" | "b" B "a";
1537        A: "d";
1538        B: "d";
1539        terminals
1540        a_t: "a";
1541        b_t: "b";
1542        c_t: "c";
1543        d_t: "d";
1544        "#
1545        .parse()
1546        .unwrap()
1547    }
1548
1549    fn test_ambiguous_grammar() -> Grammar {
1550        r#"
1551        E: E "+" E {1, left}
1552            | E "*" E {2, left}
1553            | E "^" E {3, right}
1554            | "(" E ")"
1555            | "id";
1556
1557        terminals
1558        Plus: "+";
1559        Mul: "*";
1560        Power: "^";
1561        LParen: "(";
1562        RParen: ")";
1563        id: "id";
1564        "#
1565        .parse()
1566        .unwrap()
1567    }
1568
1569    #[test]
1570    fn test_first_sets() {
1571        let grammar = test_grammar();
1572        let first_sets = first_sets(&grammar);
1573
1574        assert_eq!(first_sets.len(), 13);
1575
1576        // First of terminal is just a terminal itself.
1577        assert_eq!(
1578            &first_sets[grammar.symbol_index("id")],
1579            &follow(grammar.symbol_indexes(&["id"]))
1580        );
1581
1582        assert_eq!(
1583            &first_sets[grammar.symbol_index("F")],
1584            &follow(grammar.symbol_indexes(&["LParen", "id"]))
1585        );
1586        assert_eq!(
1587            &first_sets[grammar.symbol_index("T")],
1588            &follow(grammar.symbol_indexes(&["LParen", "id"]))
1589        );
1590        assert_eq!(
1591            &first_sets[grammar.symbol_index("E")],
1592            &follow(grammar.symbol_indexes(&["LParen", "id"]))
1593        );
1594        assert_eq!(
1595            &first_sets[grammar.symbol_index("Ep")],
1596            &follow(grammar.symbol_indexes(&["Plus", "EMPTY"]))
1597        );
1598        assert_eq!(
1599            &first_sets[grammar.symbol_index("Tp")],
1600            &follow(grammar.symbol_indexes(&["Mul", "EMPTY"]))
1601        );
1602    }
1603
1604    #[test]
1605    fn test_follow_sets() {
1606        let grammar = test_grammar();
1607        let follow_sets = follow_sets(&grammar, &first_sets(&grammar));
1608
1609        assert_eq!(
1610            &follow_sets[grammar.symbol_index("E")],
1611            &follow(grammar.symbol_indexes(&["RParen", "STOP"]))
1612        );
1613        assert_eq!(
1614            &follow_sets[grammar.symbol_index("Ep")],
1615            &follow(grammar.symbol_indexes(&["RParen", "STOP"]))
1616        );
1617        assert_eq!(
1618            &follow_sets[grammar.symbol_index("T")],
1619            &follow(grammar.symbol_indexes(&["Plus", "RParen", "STOP"]))
1620        );
1621        assert_eq!(
1622            &follow_sets[grammar.symbol_index("Tp")],
1623            &follow(grammar.symbol_indexes(&["Plus", "RParen", "STOP"]))
1624        );
1625    }
1626
1627    #[test]
1628    fn test_prooduction_rn_lengths() {
1629        let grammar = test_grammar();
1630        let first_sets = first_sets(&grammar);
1631
1632        let production_rn_lengths = production_rn_lengths(&first_sets, &grammar);
1633
1634        // RN length of "E: T Ep" is 1 as "Ep" can derive EMPTY but "T" can't.
1635        assert_eq!(production_rn_lengths[ProdIndex(1)], 1);
1636
1637        // RN length of "Ep: "+" T Ep" is 2.
1638        assert_eq!(production_rn_lengths[ProdIndex(2)], 2);
1639    }
1640
1641    #[test]
1642    fn test_symbol_at_position() {
1643        let grammar = test_grammar();
1644
1645        let prod = ProdIndex(1);
1646        let mut item = LRItem::new(&grammar, prod, None);
1647        assert_eq!(
1648            &grammar.symbol_names(grammar.productions[prod].rhs_symbols()),
1649            &["T", "Ep"]
1650        );
1651        assert_eq!(
1652            item.symbol_at_position(&grammar).unwrap(),
1653            grammar.symbol_index("T")
1654        );
1655        item.position = 1;
1656        assert_eq!(
1657            &grammar.symbol_name(item.symbol_at_position(&grammar).unwrap()),
1658            "Ep"
1659        );
1660        item.position = 2;
1661        assert!(item.symbol_at_position(&grammar).is_none());
1662        item.position = 3;
1663        assert!(item.symbol_at_position(&grammar).is_none());
1664    }
1665
1666    #[test]
1667    fn test_group_per_next_symbol() {
1668        let grammar = test_ambiguous_grammar();
1669
1670        // Create some LR state
1671        let mut lr_state = LRState::new(&grammar, 0.into(), grammar.symbol_index("E"))
1672            .add_item(LRItem {
1673                prod: 1.into(),
1674                prod_len: grammar.production_len(1.into()),
1675                rn_len: None,
1676                position: 1,
1677                follow: RefCell::new(Follow::new()),
1678            })
1679            .add_item(LRItem {
1680                prod: 2.into(),
1681                prod_len: grammar.production_len(2.into()),
1682                rn_len: None,
1683                position: 1,
1684                follow: RefCell::new(Follow::new()),
1685            })
1686            .add_item(LRItem {
1687                prod: 3.into(),
1688                prod_len: grammar.production_len(2.into()),
1689                rn_len: None,
1690                position: 1,
1691                follow: RefCell::new(Follow::new()),
1692            })
1693            .add_item(LRItem {
1694                prod: 4.into(),
1695                prod_len: grammar.production_len(3.into()),
1696                rn_len: None,
1697                position: 2,
1698                follow: RefCell::new(Follow::new()),
1699            });
1700
1701        let per_next_symbol = lr_state.group_per_next_symbol();
1702
1703        // log!("Symbols: {:#?}", grammar.symbol_names(per_next_symbol.keys()));
1704        // Symbols: ["+", "*", ")"]
1705        //log!("Pernext: {:?}", per_next_symbol);
1706        // Pernext: {SymbolIndex(1): [ItemIndex(0)], SymbolIndex(2): [ItemIndex(1)], SymbolIndex(4): [ItemIndex(2)]}
1707
1708        // Check items grouping per symbol
1709        assert_eq!(per_next_symbol.len(), 4);
1710        assert_eq!(
1711            per_next_symbol.keys().cloned().collect::<Vec<_>>(),
1712            [1, 2, 3, 5]
1713                .iter()
1714                .map(|v| SymbolIndex(*v))
1715                .collect::<Vec<_>>()
1716        );
1717        assert_eq!(
1718            per_next_symbol.values().cloned().collect::<Vec<_>>(),
1719            vec![
1720                vec![0.into()],
1721                vec![1.into()],
1722                vec![2.into()],
1723                vec![3.into()]
1724            ]
1725        );
1726
1727        // Check production based term priorities
1728        assert_eq!(
1729            lr_state.max_prior_for_term
1730                [&grammar.symbol_to_term_index(grammar.term_by_name["Power"])],
1731            3
1732        );
1733        assert_eq!(
1734            lr_state.max_prior_for_term
1735                [&grammar.symbol_to_term_index(grammar.term_by_name["Mul"])],
1736            2
1737        );
1738        assert_eq!(
1739            lr_state.max_prior_for_term
1740                [&grammar.symbol_to_term_index(grammar.term_by_name["Plus"])],
1741            1
1742        );
1743    }
1744
1745    #[test]
1746    fn test_merge_states() {
1747        let grammar = test_grammar();
1748        let lr_item_1 = LRItem {
1749            prod: ProdIndex(1),
1750            prod_len: 2,
1751            rn_len: None,
1752            position: 2,
1753            follow: RefCell::new(Follow::new()),
1754        };
1755        let lr_item_2 = LRItem {
1756            prod: ProdIndex(2),
1757            prod_len: 3,
1758            rn_len: None,
1759            position: 3,
1760            follow: RefCell::new(Follow::new()),
1761        };
1762        let old_state = LRState::new(&grammar, 0.into(), 0.into())
1763            .add_item(LRItem {
1764                follow: RefCell::new(follow([1, 3])),
1765                ..lr_item_1
1766            })
1767            .add_item(LRItem {
1768                follow: RefCell::new(follow([2])),
1769                ..lr_item_2
1770            });
1771
1772        // This should be merged as there are no introduced R/R conflicts
1773        let new_state_1 = LRState::new(&grammar, 0.into(), 0.into())
1774            .add_item(LRItem {
1775                follow: RefCell::new(follow([1])),
1776                ..lr_item_1
1777            })
1778            .add_item(LRItem {
1779                follow: RefCell::new(follow([2, 4])),
1780                ..lr_item_2
1781            });
1782        let mut old_state_1 = old_state.clone();
1783        let settings = Settings::default();
1784        assert!(LRTable::merge_state(
1785            &settings,
1786            &mut old_state_1,
1787            &new_state_1
1788        ));
1789        // When the merge succeed verify that items follows are indeed extended.
1790        assert_eq!(
1791            *old_state_1.items[ItemIndex(0)].follow.borrow(),
1792            follow([1, 3])
1793        );
1794        assert_eq!(
1795            *old_state_1.items[ItemIndex(1)].follow.borrow(),
1796            follow([2, 4])
1797        );
1798
1799        // This merge introduces new R/R conflict as the second item has 1 in
1800        // the follow set. Term 1 exists in the first item of the old state so
1801        // merging will make two items eligible for reduction on the term 1 in
1802        // the input.
1803        let new_state_2 = LRState::new(&grammar, 0.into(), 0.into())
1804            .add_item(LRItem {
1805                follow: RefCell::new(follow([3])),
1806                ..lr_item_1
1807            })
1808            .add_item(LRItem {
1809                follow: RefCell::new(follow([2, 1])),
1810                ..lr_item_2
1811            });
1812        let mut old_state_2 = old_state.clone();
1813        assert!(!LRTable::merge_state(
1814            &settings,
1815            &mut old_state_2,
1816            &new_state_2
1817        ));
1818        // Verify that no merge happened
1819        assert_eq!(
1820            *old_state_2.items[ItemIndex(0)].follow.borrow(),
1821            follow([1, 3])
1822        );
1823        assert_eq!(
1824            *old_state_2.items[ItemIndex(1)].follow.borrow(),
1825            follow([2])
1826        );
1827
1828        // The last thing to check is situation where new state has R/R
1829        // conflicts and there are no additional merge introduced R/R conflicts.
1830        // This time we should merge as the R/R conflict is not introduced by
1831        // merge process but exists due to the grammar not being LR(1).
1832        let new_state_3 = LRState::new(&grammar, 0.into(), 0.into())
1833            .add_item(LRItem {
1834                follow: RefCell::new(follow([1, 3])),
1835                ..lr_item_1
1836            })
1837            .add_item(LRItem {
1838                follow: RefCell::new(follow([2, 1])),
1839                ..lr_item_2
1840            });
1841        let mut old_state_3 = old_state.clone();
1842        assert!(LRTable::merge_state(
1843            &settings,
1844            &mut old_state_3,
1845            &new_state_3
1846        ));
1847        // Verify that merge happened
1848        assert_eq!(
1849            *old_state_3.items[ItemIndex(0)].follow.borrow(),
1850            follow([1, 3])
1851        );
1852        assert_eq!(
1853            *old_state_3.items[ItemIndex(1)].follow.borrow(),
1854            follow([2, 1])
1855        );
1856    }
1857
1858    #[test]
1859    fn test_closure() {
1860        let grammar = test_grammar();
1861        let firsts = first_sets(&grammar);
1862
1863        // Create some LR state
1864        let mut lr_state =
1865            LRState::new(&grammar, StateIndex(0), grammar.symbol_index("T")).add_item(
1866                LRItem::with_follow(&grammar, ProdIndex(1), None, follow([grammar.stop_index])),
1867            );
1868
1869        lr_state.closure(&firsts, &None);
1870
1871        let prods = [1, 4, 7, 8];
1872        let follow_sets = [
1873            grammar.symbol_indexes(&["STOP"]),
1874            grammar.symbol_indexes(&["STOP", "Plus"]),
1875            grammar.symbol_indexes(&["STOP", "Plus", "Mul"]),
1876            grammar.symbol_indexes(&["STOP", "Plus", "Mul"]),
1877        ];
1878
1879        assert_eq!(lr_state.items.len(), 4);
1880
1881        itertools::izip!(&lr_state.items, prods, follow_sets).for_each(|(item, prod, follows)| {
1882            assert_eq!(item.prod, prod.into());
1883            assert!(item.follow.borrow().iter().eq(follows.iter()));
1884        });
1885
1886        log!("{:?}", lr_state);
1887    }
1888
1889    #[test]
1890    fn test_lr_states_for_grammar_2() {
1891        let grammar = test_grammar_2();
1892
1893        let settings = Settings::new().table_type(TableType::LALR);
1894
1895        let table = LRTable::new(&grammar, &settings).unwrap();
1896        output_cmp!(
1897            "src/table/grammar_2.expected",
1898            format!("{:#?}", table.states)
1899        );
1900    }
1901
1902    #[test]
1903    fn test_lr_states_for_non_lalr_grammar() {
1904        let grammar = test_non_lalr_grammar();
1905
1906        // Calculating LR tables with LALR method will result in a state with
1907        // R/R conflicts. So, deterministic LR parsing method cannot be used for
1908        // this grammar and LALR construction method.
1909        //
1910        // Conflicts are found in state 2 which is entered when 'd' is
1911        // recognized in the input. There are two R/R conflicts, for inputs 'a'
1912        // and 'c'. In both case parser may reduce both A and B.
1913        let settings = Settings::new().table_type(TableType::LALR);
1914
1915        let table = LRTable::new(&grammar, &settings).unwrap();
1916
1917        output_cmp!(
1918            "src/table/grammar_nonlalr_lalr.expected",
1919            format!("{grammar}\n\n{:#?}", table.states)
1920        );
1921
1922        // In LALR_PAGER construction method R/R conflicts are avoided during
1923        // merge phase where states are kept split if merging would introduce
1924        // new R/R conflict. This essentially makes LALR_PAGER very close in
1925        // power to canonical LR(1) but with the number of states which is
1926        // almost like in LALR (i.e. LR(0)).
1927        //
1928        // In this case we have 13 states while in previous LALR case there was
1929        // 12 states.
1930        let settings = Settings::new().table_type(TableType::LALR_PAGER);
1931
1932        let table = LRTable::new(&grammar, &settings).unwrap();
1933
1934        output_cmp!(
1935            "src/table/grammar_nonlalr_lalr_pagerw.expected",
1936            format!("{grammar}\n\n{:#?}", table.states)
1937        );
1938    }
1939
1940    #[test]
1941    fn test_sorted_terminals() {
1942        let grammar: Grammar = r#"
1943            S: A | C | B;
1944            terminals
1945            A: /\d+/;
1946            B: "bb";
1947            C: "c";
1948            "#
1949        .parse()
1950        .unwrap();
1951
1952        let settings = Settings::new().table_type(TableType::LALR_PAGER);
1953
1954        let table = LRTable::new(&grammar, &settings).unwrap();
1955        assert_eq!(
1956            &table.states[StateIndex(0)]
1957                .sorted_terminals
1958                .iter()
1959                .map(|i| i.0 .0)
1960                .collect::<Vec<_>>(),
1961            &vec![2, 3, 1]
1962        );
1963    }
1964}
rustemo_compiler/table/mod.rs

rustemo_compiler/table/
mod.rs