ielr 0.1.1

Table generation backend for LR parser generators
Documentation
/*
 * Copyright 2022 Arnaud Golfouse
 *
 * This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0. If a copy of the MPL was not distributed with this
 * file, You can obtain one at https://mozilla.org/MPL/2.0/.
 */

//! Implementation of the structures and algorithms specific to IELR(1).

mod item_relation;
mod phase0;
mod phase1;
mod phase2;
mod phase3;
mod phase4;

pub(crate) use self::{
    phase0::Phase0, phase1::Phase1, phase2::Phase2, phase3::Phase3, phase4::Phase4,
};

#[cfg(test)]
mod tests {
    use crate::{
        input::{
            Grammar, Node, Symbol,
            Symbol::{Node as N, Token as T},
        },
        lr::{LRState, LRTable, LALR},
        GotoIdx, StateIdx,
    };

    pub(super) use crate::test_common::{
        nodes::{N1, N2, N3, N4, N5, N6, N7, N8, START},
        tokens::{T1, T2, T3, T4, T5},
    };

    pub(super) fn get_state<'a, Kind>(
        table: &'a LRTable<Kind>,
        path_from_start: &[Symbol],
    ) -> Option<(StateIdx<Kind>, &'a LRState<Kind>)> {
        let mut state_index = *table.starting_states.get(&START)?;
        let mut state = table.get_state(state_index)?;
        for symbol in path_from_start {
            state_index = *state.transitions.get(symbol)?;
            state = table.get_state(state_index)?;
        }
        Some((state_index, state))
    }

    /// Get a [`GotoIdx`] `g`, given
    /// - `state_index`: the value of `goto_data.from_state[g]`.
    /// - `node`: the value of `goto_data.symbol[g]`.
    pub(super) fn get_goto(
        goto_data: &GotoIndicesData<LALR>,
        state_index: StateIdx<LALR>,
        node: Node,
    ) -> Option<GotoIdx> {
        for ((&s, &index_node), g) in goto_data
            .from_state
            .iter()
            .zip(&goto_data.symbol)
            .zip(goto_data.get_all_goto_indices())
        {
            if s == state_index && index_node == node {
                return Some(g);
            }
        }
        None
    }

    /// Quickly declare all non-core items in a table.
    ///
    /// This will assert that these are all unique, and that we don't miss any.
    macro_rules! get_gotos {
        (in $table:expr;
            $(
                $($from_state:ident => $path_symbol:expr)? => $state_name:ident {
                    $($node:ident: $goto:ident),* $(,)?
                };
            )*
        ) => {
            use std::collections::HashMap;
            let (goto_data, _) = $crate::ielr::Phase0::compute_goto_idx_tables(&$table);
            let mut len = 0;
            let mut state_indices = HashMap::new();
            $(
                let (state_index, state) = get_gotos!(@if_empty $($from_state)? {
                    $crate::ielr::tests::get_state(&$table, &[]).unwrap()
                } else {
                    $(
                        let state_index = $table
                            .get_state(state_indices[stringify!($from_state)])
                            .unwrap()
                            .transitions[&$path_symbol];
                        let state = $table.get_state(state_index).unwrap();
                        (state_index, state)
                    )?
                });
                state_indices.insert(stringify!($state_name), state_index);

                $(
                    let $goto = if state.uses_precedence {
                        unreachable!("this macro cannot handle grammars with precedence annotations")
                    } else {
                        $crate::ielr::tests::get_goto(&goto_data, state_index, $node).unwrap()
                    };
                    len += 1;
                )*
                // no missing goto
                let all_nodes = [$($node),*];
                assert!(
                    state
                        .all_items()
                        .iter()
                        .all(|item| item.index > 0 || all_nodes.contains(&item.prod_idx.lhs))
                );
            )*
            // all gotos are unique
            assert_eq!([$($($goto,)*)*].into_iter().collect::<$crate::structures::Set<$crate::GotoIdx>>().len(), len);
        };
        (@if_empty { $($then:tt)* } else { $($otherwise:tt)* }) => {
            {$($then)*}
        };
        (@if_empty $x:tt { $($then:tt)* } else { $($otherwise:tt)* }) => {
            {$($otherwise)*}
        };
    }

    pub(super) use get_gotos;

    use super::phase0::GotoIndicesData;

    /// ```text
    /// START → T1 N1 N2 N6
    /// START → T2 N4
    /// N1 → T2
    /// N2 → N3
    /// N3 →
    /// N3 → T3
    /// N4 → N5 N3
    /// N5 → T4
    /// N6 → T1 T2
    /// N6 → T4
    /// ```
    ///
    /// The LALR state machine is: (order of states may change)
    /// ```text
    /// 0:  START →∙T1 N1 N2 N6
    ///     START →∙T2 N4
    /// 1:  START → T1∙N1 N2 N6
    ///     N1 →∙T2
    /// 2:  START → T2∙N4
    ///     N4 →∙N5 N3
    ///     N5 →∙T4
    /// 3:  START → T1 N1∙N2 N6
    ///     N2 →∙N3
    ///     N3 →∙
    ///     N3 →∙T3
    /// 4:  N1 → T2∙
    /// 5:  START → T2 N4∙
    /// 6:  N4 → N5∙N3
    ///     N3 →∙
    ///     N3 →∙T3
    /// 7:  N5 → T4∙
    /// 8:  START → T1 N1 N2∙N6
    ///     N6 →∙T1 T2
    ///     N6 →∙T4
    /// 9:  N2 → N3∙
    /// 10: N3 → T3∙
    /// 11: N4 → N5 N3∙
    /// 12: START → T1 N1 N2 N6∙
    /// 13: N6 → T1∙T2
    /// 14: N6 → T4∙
    /// 15: N6 → T1 T2∙
    /// ```
    pub(super) fn grammar_1() -> Grammar {
        let mut grammar = Grammar::new();
        for (lhs, rhs) in [
            (START, vec![T(T1), N(N1), N(N2), N(N6)]),
            (START, vec![T(T2), N(N4)]),
            (N1, vec![T(T2)]),
            (N2, vec![N(N3)]),
            (N3, vec![]),
            (N3, vec![T(T3)]),
            (N4, vec![N(N5), N(N3)]),
            (N5, vec![T(T4)]),
            (N6, vec![T(T1), T(T2)]),
            (N6, vec![T(T4)]),
        ] {
            grammar.add_production(lhs, rhs).unwrap();
        }

        grammar
    }

    /// ```text
    /// START → T1 N1 N2 N6
    /// N1 → N3
    /// N2 → T2 N4 N5
    /// N2 → T2 N6
    /// N3 →
    /// N3 → N7
    /// N4 → T1
    /// N5 →
    /// N5 → T5
    /// N6 → N7
    /// N7 → N8 N3
    /// N7 → T3
    /// N8 → T1
    /// ```
    ///
    /// The LALR state machine is: (order of states may change)
    /// ```text
    /// 0:  START →∙T1 N1 N2 N6    i0        { EOF }
    /// 1:  START → T1∙N1 N2 N6              { EOF }
    ///     N1 →∙N3                i1_1      { T2 }
    ///     N3 →∙                  i1_2      { T2 }
    ///     N3 →∙N7                i1_3      { T2 }
    ///     N7 →∙N8 N3             i1_4      { T2 }
    ///     N7 →∙T3                i1_5      { T2 }
    ///     N8 →∙T1                i1_6      { T1, T2, T3 }
    /// 2:  START → T1 N1∙N2 N6              { EOF }
    ///     N2 →∙T2 N4 N5          i2_1      { T1, T3 }
    ///     N2 →∙T2 N6             i2_2      { T1, T3 }
    /// 3:  START → T1 N1 N2∙N6              { EOF }
    ///     N6 →∙N7                i3_1      { EOF }
    ///     N7 →∙N8 N3             i3_2      { EOF }
    ///     N7 →∙T3                i3_3      { EOF }
    ///     N8 →∙T1                i3_4      { EOF, T1, T2, T3 }
    /// 4:  START → T1 N1 N2 N6∙             { EOF }
    /// 5:  N1 → N3∙                         { T2 }
    /// 6:  N3 → N7∙                         { EOF, T1, T2, T3 }
    /// 7:  N7 → N8∙N3                       { EOF, T1, T2, T3 }
    ///     N3 →∙                  i7_1      { EOF, T1, T2, T3 }
    ///     N3 →∙N7                i7_2      { EOF, T1, T2, T3 }
    ///     N7 →∙N8 N3             i7_3      { EOF, T1, T2, T3 }
    ///     N7 →∙T3                i7_4      { EOF, T1, T2, T3 }
    ///     N8 →∙T1                i7_5      { EOF, T1, T2, T3 }
    /// 8:  N7 → T3∙                         { EOF, T1, T2, T3 }
    /// 9:  N2 → T2∙N4 N5                    { T1, T3 }
    ///     N2 → T2∙N6                       { T1, T3 }
    ///     N4 →∙T1                i9_1      { T1, T3, T5 }
    ///     N6 →∙N7                i9_2      { T1, T3 }
    ///     N7 →∙N8 N3             i9_3      { T1, T3 }
    ///     N7 →∙T3                i9_4      { T1, T3 }
    ///     N8 →∙T1                i9_5      { T1, T3 }
    /// 10: N2 → T2 N4∙N5                    { T1, T3 }
    ///     N5 →∙                  i10_1     { T1, T3 }
    ///     N5 →∙T5                i10_2     { T1, T3 }
    /// 11: N2 → T2 N4 N5∙                   { T1, T3 }
    /// 12: N2 → T2 N6∙                      { T1, T3 }
    /// 13: N6 → N7∙                         { EOF, T1, T3 }
    /// 14: N8 → T1∙                         { EOF, T1, T2, T3 }
    /// 15: N7 → N8 N3∙                      { EOF, T1, T2, T3 }
    /// 16: N4 → T1∙                         { T1, T3, T5 }
    ///     N8 → T1∙                         { T1, T3 }
    /// 17: N5 → T5∙                         { T1, T3 }
    /// 18: END STATE
    /// ```
    pub(super) fn grammar_2() -> Grammar {
        let mut grammar = Grammar::new();
        for (lhs, rhs) in [
            (START, vec![T(T1), N(N1), N(N2), N(N6)]),
            (N1, vec![N(N3)]),
            (N2, vec![T(T2), N(N4), N(N5)]),
            (N2, vec![T(T2), N(N6)]),
            (N3, vec![]),
            (N3, vec![N(N7)]),
            (N4, vec![T(T1)]),
            (N5, vec![]),
            (N5, vec![T(T5)]),
            (N6, vec![N(N7)]),
            (N7, vec![N(N8), N(N3)]),
            (N7, vec![T(T3)]),
            (N8, vec![T(T1)]),
        ] {
            grammar.add_production(lhs, rhs).unwrap();
        }

        grammar
    }

    /// ```text
    /// START → T1 T2 N1
    /// START → T1 T2 N2
    /// N1 → T3
    /// N2 → T3
    /// ```
    ///
    /// The LALR state machine is: (order of states may change)
    /// ```text
    /// 0:  START →∙T1 T2 N1  { EOF }
    ///     START →∙T1 T2 N2  { EOF }
    /// 1:  START → T1∙T2 N1  { EOF }
    ///     START → T1∙T2 N2  { EOF }
    /// 2:  START → T1 T2∙N1  { EOF }
    ///     START → T1 T2∙N2  { EOF }
    ///     N1 →∙T3           { EOF }
    ///     N2 →∙T3           { EOF }
    /// 3:  N1 → T3∙          { EOF }
    ///     N2 → T3∙          { EOF }
    /// 4:  START → T1 T2 N1∙ { EOF }
    /// 5:  START → T1 T2 N2∙ { EOF }
    /// 6:  END STATE
    /// ```
    pub(super) fn grammar_3() -> Grammar {
        let mut grammar = Grammar::new();
        for (lhs, rhs) in [
            (START, vec![T(T1), T(T2), N(N1)]),
            (START, vec![T(T1), T(T2), N(N2)]),
            (N1, vec![T(T3)]),
            (N2, vec![T(T3)]),
        ] {
            grammar.add_production(lhs, rhs).unwrap();
        }

        grammar
    }
}