ielr 0.1.1

Table generation backend for LR parser generators
Documentation
/*
 * Copyright 2022 Arnaud Golfouse
 *
 * This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0. If a copy of the MPL was not distributed with this
 * file, You can obtain one at https://mozilla.org/MPL/2.0/.
 */

mod item;
mod precedence_annotations;
mod state;

use crate::{
    grammar_data::GrammarData,
    indices::{ItemIdx, StateIdxRaw},
    input::{Node, Symbol},
    structures::{Map, Set},
    StateIdx,
};

pub(crate) use self::{item::LRItem, state::LRState};

#[allow(clippy::upper_case_acronyms)]
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub(crate) struct LALR;

#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub(crate) struct LR1;

#[derive(Debug)]
pub(crate) struct LRTable<Kind> {
    pub(crate) states: Vec<LRState<Kind>>,
    pub(crate) starting_states: Map<Node, StateIdx<Kind>>,
}

/// The number of states excedeed the maximum for [`StateIdx`].
#[derive(Clone, Copy, Debug)]
pub(crate) struct StatesOverflow;

impl LRTable<LALR> {
    /// Generates a new LR(0) states table for grammar `grammar_data`, that can start
    /// on any of `starting_nodes`.
    // TODO: tests
    pub(crate) fn new_lr0(
        grammar_data: &GrammarData,
        starting_nodes: Set<Node>,
    ) -> Result<Self, StatesOverflow> {
        let mut this = Self {
            states: Vec::new(),
            starting_states: Map::default(),
        };
        if starting_nodes.len().checked_mul(2).is_none() {
            return Err(StatesOverflow);
        }
        let mut accessing_symbols: Map<Symbol, Vec<StateIdx<LALR>>> = Map::default();
        for node in starting_nodes {
            let start_index = StateIdx::new(this.states.len() as StateIdxRaw);
            let end_index = StateIdx::new(this.states.len() as StateIdxRaw + 1);
            this.starting_states.insert(node, start_index);
            let start_state = {
                let mut items = Vec::new();
                for (prod_idx, _) in grammar_data.get_node_productions(node) {
                    items.push(LRItem::new(prod_idx, grammar_data));
                }
                let mut state = LRState::new(items, None);
                state.closure(grammar_data);
                state.transitions.insert(Symbol::Node(node), end_index);
                state
            };
            let end_state = {
                let items: Vec<_> = start_state
                    .items
                    .iter()
                    .filter_map(|item| {
                        if item.current_symbol(grammar_data) == Some(Symbol::Node(node)) {
                            Some(item.successor())
                        } else {
                            None
                        }
                    })
                    .collect();
                let mut state = LRState::new(items, Some(Symbol::Node(node)));
                state.closure(grammar_data);
                state
            };
            if !end_state.items.is_empty() {
                accessing_symbols
                    .entry(Symbol::Node(node))
                    .or_default()
                    .push(end_index);
            }
            this.states.push(start_state);
            this.states.push(end_state);
        }

        for index in 0.. {
            let state = match this.states.get_mut(index) {
                Some(s) => s,
                None => break,
            };
            'transition: for (transition_symbol, new_items) in state.successors(grammar_data) {
                let potential_states = accessing_symbols.entry(transition_symbol).or_default();
                for &potential_state in &*potential_states {
                    let state = &this.states[potential_state];
                    if state.compatible(&new_items) {
                        this.states[index]
                            .transitions
                            .insert(transition_symbol, potential_state);
                        continue 'transition;
                    }
                }
                if this.states.len() > StateIdxRaw::MAX as usize {
                    return Err(StatesOverflow);
                }
                let new_state_index = StateIdx::new(this.states.len() as StateIdxRaw);
                let mut new_state = LRState::new(new_items, Some(transition_symbol));
                new_state.closure(grammar_data);
                this.states.push(new_state);
                this.states[index]
                    .transitions
                    .insert(transition_symbol, new_state_index);
                potential_states.push(new_state_index);
            }
        }

        Ok(this)
    }
}

impl LRTable<LR1> {
    /// Creates a new `LR1Table`, without lookahead information.
    pub(crate) fn new_lr1(lr0_table: &LRTable<LALR>) -> Self {
        Self {
            states: lr0_table
                .states
                .iter()
                .map(|state| LRState {
                    items: state.items.clone(),
                    core_items_len: state.core_items_len,
                    transitions: state
                        .transitions
                        .iter()
                        .map(|(&symbol, &to)| (symbol, StateIdx::new(to.get())))
                        .collect(),
                    accessing_symbol: state.accessing_symbol,
                    uses_precedence: state.uses_precedence,
                })
                .collect(),
            starting_states: lr0_table
                .starting_states
                .iter()
                .map(|(&n, &s)| (n, StateIdx::new(s.get())))
                .collect(),
        }
    }
}

impl<Kind> LRTable<Kind> {
    /// Return an iterator of length `path.len()` over the items visited when following
    /// `path` starting at `from`.
    ///
    /// # Return
    /// - The next symbol in `path`.
    /// - The current state index `idx`.
    /// - `self.states[idx]`.
    /// - The current item.
    ///
    /// Note that this starts by returning
    /// `(path[0], from, self.states[from], from_item)`.
    pub(crate) fn follow_transitions<'a>(
        &'a self,
        from: StateIdx<Kind>,
        from_item: ItemIdx,
        path: &'a [Symbol],
    ) -> impl Iterator<Item = (Symbol, StateIdx<Kind>, &'a LRState<Kind>, &'a LRItem)> + 'a {
        let mut current_index = from;
        let mut current_item_index = from_item;
        path.iter().map(move |symbol: &Symbol| {
            let index = current_index;
            let state = &self.states[index];
            let item = &state.items[current_item_index as usize];
            current_index = state.transitions[symbol];
            current_item_index = item.next_item_local_index.unwrap();
            (*symbol, index, state, item)
        })
    }

    #[cfg(test)]
    pub(crate) fn get_state(&self, s: StateIdx<Kind>) -> Option<&LRState<Kind>> {
        self.states.get(s.get() as usize)
    }
}

impl<Kind> From<LRTable<Kind>> for crate::output::Table {
    fn from(table: LRTable<Kind>) -> Self {
        use crate::output::{Action, Item, Lookahead, State, StateIdx};
        Self {
            states: table
                .states
                .into_iter()
                .map(|state| {
                    let mut action_table = Map::default();
                    let mut goto_table = Map::default();
                    for (&symbol, &to) in &state.transitions {
                        let to = StateIdx::from(to);
                        match symbol {
                            Symbol::Token(token) => {
                                action_table.insert(Lookahead::Token(token), Action::Shift(to));
                            }
                            Symbol::Node(node) => {
                                goto_table.insert(node, to);
                            }
                        }
                    }
                    State {
                        items: state
                            .items
                            .into_iter()
                            .map(|item| Item {
                                production: item.prod_idx,
                                index: item.index,
                                lookaheads: item.lookaheads,
                            })
                            .collect(),
                        core_items_len: state.core_items_len as usize,
                        action_table,
                        goto_table,
                        accessing_symbol: state.accessing_symbol,
                    }
                })
                .collect(),
            starting_states: table
                .starting_states
                .into_iter()
                .map(|(node, s)| (node, s.into()))
                .collect(),
        }
    }
}