ielr 0.1.1

Table generation backend for LR parser generators
Documentation
/*
 * Copyright 2022 Arnaud Golfouse
 *
 * This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0. If a copy of the MPL was not distributed with this
 * file, You can obtain one at https://mozilla.org/MPL/2.0/.
 */

use super::precedence_annotations::PrecedenceAnnotations;
use crate::{
    grammar_data::GrammarData,
    indices::{ItemIdx, SymbolIdx},
    input::{Grammar, Node, ProdIdx, Symbol},
    output::Lookahead,
    structures::{BitSet, Set},
};

#[derive(Clone, Debug, PartialEq)]
pub(crate) struct LRItem {
    /// The production of this item.
    ///
    /// This has the form `lhs → rhs`, where `rhs` may be retrieved using
    /// [`Grammar::get_rhs`](crate::input::Grammar::get_rhs).
    pub(crate) prod_idx: ProdIdx,
    /// At which point of [`production`](Self::prod_idx) we are.
    pub(crate) index: SymbolIdx,
    /// Which items in the same state are directly derived from this item.
    pub(crate) directly_derives: BitSet<ItemIdx>,
    /// Which items in the same state are _directly_ derived from this item.
    pub(crate) derives: BitSet<ItemIdx>,
    /// If the dot is _not_ on the rightmost position, this item will be involved in a
    /// state transition.
    ///
    /// Then, `next_item_local_index` contains the index of the shifted item (in the next state).
    pub(crate) next_item_local_index: Option<ItemIdx>,
    /// Annotations for precedence.
    pub(crate) annotations: PrecedenceAnnotations,
    /// All symbols in `rhs` _strictly_ before this index are nullable.
    ///
    /// Will be compared against `Self::index`.
    nullable_up_to: SymbolIdx,
    /// All symbols in `rhs` after this index are nullable.
    ///
    /// Will be compared against `Self::index`.
    nullable_after: SymbolIdx,
    pub(crate) lookaheads: Set<Lookahead>,
    /// This contains any node `A`, such that this item forbid some `A →∙...`
    /// derivation.
    pub(crate) forbidden_nodes: Set<Node>,
}

impl LRItem {
    pub(crate) fn current_symbol(&self, grammar: &Grammar) -> Option<Symbol> {
        grammar
            .get_rhs(self.prod_idx)
            .unwrap_or_default()
            .get(self.index as usize)
            .copied()
    }

    /// Creates a new LR(0) item, with `index` 0 (aka the dot is on the left).
    ///
    /// - The `PrecedenceAnnotations` will be those specified in the grammar.
    /// - `derives` will be empty.
    /// - `nullable_up_to` and `nullable_after` will be computed.
    pub(super) fn new(prod_idx: ProdIdx, grammar_data: &GrammarData) -> Self {
        let production = grammar_data.get_production(prod_idx).unwrap();
        let mut nullable_up_to = 0;
        let mut nullable_after = production.symbols.len() as SymbolIdx;
        while let Some(symbol) = production.symbols.get(nullable_up_to as usize) {
            if !grammar_data.nullables.is_nullable_symbol(*symbol) {
                break;
            }
            nullable_up_to += 1;
        }
        while nullable_after > 0 {
            let symbol = match production.symbols.get(nullable_after as usize - 1) {
                Some(s) => s,
                None => break,
            };
            if !grammar_data.nullables.is_nullable_symbol(*symbol) {
                break;
            }
            nullable_after -= 1;
        }
        Self {
            prod_idx,
            index: 0,
            annotations: PrecedenceAnnotations::default(),
            directly_derives: BitSet::new(),
            derives: BitSet::new(),
            next_item_local_index: None,
            nullable_up_to,
            nullable_after,
            lookaheads: Set::default(),
            forbidden_nodes: Set::default(),
        }
    }

    /// Get the (LR(0)) item obtained by shifting `self.index` to the right (`+1`).
    ///
    /// Assumes `self.index < self.production.len()`.
    ///
    /// This will:
    /// - Generate the appropriate `PrecedenceAnnotations`.
    /// - Clear `derives`.
    pub(crate) fn successor(&self) -> Self {
        let index = self.index + 1;
        Self {
            prod_idx: self.prod_idx,
            index,
            annotations: PrecedenceAnnotations {
                forbidden_left: self.annotations.forbidden_left.clone(),
                forbidden_right: self.annotations.forbidden_right.clone(),
            },
            directly_derives: BitSet::new(),
            derives: BitSet::new(),
            next_item_local_index: None,
            nullable_up_to: self.nullable_up_to,
            nullable_after: self.nullable_after,
            lookaheads: Set::default(),
            forbidden_nodes: Set::default(),
        }
    }

    /// Get all the productions derived from this item, with the _new_ precedence.
    ///
    /// After each iteration of the returned iterator, if a precedence annotation is
    /// used to forbid some `A →∙...` derivation, then:
    /// - `used_precedence` is set to `true`.
    /// - `A` is inserted into `forbidden_nodes`.
    pub(super) fn derived<'a>(
        &'a self,
        grammar: &'a Grammar,
        used_precedence: &'a mut bool,
        forbidden_nodes: &'a mut Set<Node>,
    ) -> impl Iterator<Item = (ProdIdx, PrecedenceAnnotations)> + 'a {
        struct MaybeIter<I>(Option<I>);
        impl<I: Iterator> Iterator for MaybeIter<I> {
            type Item = I::Item;
            fn next(&mut self) -> Option<Self::Item> {
                self.0.as_mut()?.next()
            }
        }

        let node = match self.current_symbol(grammar) {
            Some(Symbol::Node(node)) => node,
            _ => return MaybeIter(None),
        };
        let production = grammar.get_production(self.prod_idx).unwrap();

        // For example, E →∙E + E
        let is_on_left = self.index <= self.nullable_up_to;
        // For example, E → E +∙E
        let is_on_right = self.index + 1 >= self.nullable_after;

        let inherited_left = &self.annotations.forbidden_left;
        let production_left = &production.precedence.left;
        let inherited_right = &self.annotations.forbidden_right;
        let production_right = &production.precedence.right;

        MaybeIter(Some(grammar.get_node_productions(node).filter_map(
            move |(new_prod_idx, new_production)| {
                if production
                    .forbidden_derivations
                    .contains(&(self.index, new_prod_idx.index))
                {
                    *used_precedence = true;
                    forbidden_nodes.insert(new_prod_idx.lhs);
                    return None;
                }
                if is_on_left
                    && (!PrecedenceAnnotations::compatible_with(
                        production_left,
                        &new_production.precedence.right,
                    ) || !PrecedenceAnnotations::compatible_with(
                        inherited_left,
                        &new_production.precedence.left,
                    ))
                {
                    *used_precedence = true;
                    return None;
                }
                if is_on_right
                    && (!PrecedenceAnnotations::compatible_with(
                        production_right,
                        &new_production.precedence.left,
                    ) || !PrecedenceAnnotations::compatible_with(
                        inherited_right,
                        &new_production.precedence.right,
                    ))
                {
                    *used_precedence = true;
                    return None;
                }

                // This is useful to prune useless annotations, for example on E →∙INT or left annotations on E →∙- E
                let can_derive_on_left =
                    matches!(new_production.symbols.first(), Some(Symbol::Node(_)));
                let can_derive_on_right =
                    matches!(new_production.symbols.last(), Some(Symbol::Node(_)));

                let mut new_annotations = PrecedenceAnnotations::new();
                if is_on_left {
                    // For example:
                    // E →∙E + E
                    // E →∙- E      ← new item: what was forbidden on the left is now forbidden on the right.
                    if can_derive_on_left {
                        PrecedenceAnnotations::merge_increasing(
                            &mut new_annotations.forbidden_left,
                            inherited_left,
                        );
                    }
                    if can_derive_on_right {
                        PrecedenceAnnotations::merge_increasing(
                            &mut new_annotations.forbidden_right,
                            production_left,
                        );
                    }
                }
                if is_on_right {
                    // For example:
                    // E → E +∙E
                    // E →∙E ?      ← new item: what was forbidden on the right is now forbidden on the left.
                    if can_derive_on_right {
                        PrecedenceAnnotations::merge_increasing(
                            &mut new_annotations.forbidden_right,
                            inherited_right,
                        );
                    }
                    if can_derive_on_left {
                        PrecedenceAnnotations::merge_increasing(
                            &mut new_annotations.forbidden_left,
                            production_right,
                        );
                    }
                }
                Some((new_prod_idx, new_annotations))
            },
        )))
    }

    /// Returns an ordering on [`LRItem`], such that items are sorted by:
    /// - [`Self::index`] in reverse.
    /// - In case of equal `index`, sort by [`Self::prod_idx`].
    pub(super) fn order(item1: &Self, item2: &Self) -> std::cmp::Ordering {
        item1
            .index
            .cmp(&item2.index)
            .reverse()
            .then(item1.annotations.cmp(&item2.annotations))
            .then(item1.prod_idx.cmp(&item2.prod_idx))
    }
}