ielr 0.1.1

Table generation backend for LR parser generators
Documentation
/*
 * Copyright 2022 Arnaud Golfouse
 *
 * This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0. If a copy of the MPL was not distributed with this
 * file, You can obtain one at https://mozilla.org/MPL/2.0/.
 */

use super::{ConflictSolution, Node, Symbol};
use crate::{
    indices::{ProdIdxRaw, SymbolIdx},
    structures::Map,
};

/// The type used for specifying precedence levels.
pub type PrecedenceLevel = u16;

#[derive(Clone, Debug, Default)]
pub(crate) struct PrecedenceLevels {
    /// Productions that cannot appear to the left of this production.
    pub(crate) left: Vec<Option<PrecedenceLevel>>,
    /// Productions that cannot appear to the right of this production.
    pub(crate) right: Vec<Option<PrecedenceLevel>>,
}

/// A production of the grammar.
///
/// This can be used to:
/// - Retreive the right-hand side of the associated production via [`Self::symbols`].
/// - Set precedence levels.
/// - Set extra forbidden derivations in the right-hand side.
#[derive(Clone, Debug)]
pub struct Production {
    /// The right-hand side of this production.
    pub symbols: Box<[Symbol]>,
    pub(crate) precedence: PrecedenceLevels,
    /// Contains a list of forbidden derivations.
    ///
    /// If this contains `(i, j)`, and `symbols[i]` is a [`Node`], then the `symbols[i]`
    /// cannot derive the `j`-th production associated with `symbols[i]` (aka
    /// `ProdIdx { lhs: symbols[i], index: j }`).
    pub(crate) forbidden_derivations: Vec<(SymbolIdx, ProdIdxRaw)>,
}

impl Production {
    /// Add a precedence for the current production.
    ///
    /// Any production (including this one) with
    /// - the same precedence family
    /// - strictly lower right precedence
    ///
    /// will not be allowed to appear to the left of this production.
    pub fn set_left_precedence(
        &mut self,
        precedence_family: PrecedenceFamilyToken,
        level: PrecedenceLevel,
    ) -> &mut Self {
        while self.precedence.left.len() <= precedence_family.0 {
            self.precedence.left.push(None);
        }
        self.precedence.left[precedence_family.0] = Some(level);
        self
    }

    /// Add a precedence for the current production.
    ///
    /// Any production (including this one) with
    /// - the same precedence family
    /// - strictly lower left precedence
    ///
    /// will not be allowed to appear to the right of this production.
    pub fn set_right_precedence(
        &mut self,
        precedence_family: PrecedenceFamilyToken,
        level: PrecedenceLevel,
    ) -> &mut Self {
        while self.precedence.right.len() <= precedence_family.0 {
            self.precedence.right.push(None);
        }
        self.precedence.right[precedence_family.0] = Some(level);
        self
    }

    /// The symbols at `symbol_index` of the production will not be able to derive
    /// `forbidden_production`.
    ///
    /// # Errors
    /// This will return an error if
    /// - `symbol_index` is out of bounds for [`Self::symbols`], or refers to a
    /// [`Token`](super::Token).
    /// - `symbol_index` refers to a different [`Node`] than `forbidden_production`.
    ///
    /// # Example
    /// In rust, it is forbidden to chain comparison operator. As such, one could write:
    /// ```
    /// # use ielr::input::{Node, Token, Symbol, Grammar};
    /// const E: Node = Node(0);
    /// const EQUAL: Token = match Token::new(1) {
    ///     Some(t) => t,
    ///     None => unreachable!()
    /// };
    /// let mut grammar = Grammar::new();
    /// let comparison_prod = grammar.add_production(E, vec![Symbol::Node(E), Symbol::Token(EQUAL), Symbol::Node(E)]).unwrap();
    /// let production = grammar.get_production_mut(comparison_prod).unwrap();
    /// production
    ///     .forbid_derivation(0, comparison_prod).unwrap()
    ///     .forbid_derivation(2, comparison_prod).unwrap();
    /// ```
    ///
    /// # Note
    /// This can be used to emulate operator precedence. However, it is recommended to
    /// use the [`Self::set_left_precedence`] and [`Self::set_right_precedence`] methods
    /// instead, as those are more versatile, 'safer' (they don't remove any valid
    /// parse), and lead to faster table generation.
    pub fn forbid_derivation(
        &mut self,
        symbol_index: usize,
        forbidden_production: ProdIdx,
    ) -> Result<&mut Self, ForbidDerivationError> {
        match self.symbols.get(symbol_index) {
            None | Some(Symbol::Token(_)) => Err(ForbidDerivationError),
            Some(Symbol::Node(n)) => {
                if *n == forbidden_production.lhs {
                    self.forbidden_derivations
                        .push((symbol_index as SymbolIdx, forbidden_production.index));
                    Ok(self)
                } else {
                    Err(ForbidDerivationError)
                }
            }
        }
    }
}

/// Token that indentifies a family of precedence annotations.
///
/// Used in [Production::set_left_precedence] and [Production::set_right_precedence].
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct PrecedenceFamilyToken(usize);

/// The grammar used to define the parser.
///
/// This contains **productions**: rules that describe how to create the various
/// syntactic objects.
///
/// A production has the form `node → symbols`, where `node` is a [`Node`] and
/// `symbols` is a list of [`Symbol`]s.
#[derive(Clone, Debug)]
pub struct Grammar {
    /// All the productions of the grammar.
    productions: Map<Node, Vec<Production>>,
    next_precedence_family: usize,
    /// Solutions for LR conflicts.
    pub(crate) conflict_solutions: Vec<ConflictSolution>,
}

/// Handle to a production in the [`Grammar`].
///
/// The left-hand side may be retrieved directly. However, to get the right-hand
/// side, one will need to call [`Grammar::get_rhs`].
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct ProdIdx {
    /// Left-hand side of the production.
    pub lhs: Node,
    /// Index of the right-hand side of the production in
    /// [`grammar.productions[lhs]`](Grammar::productions).
    pub index: ProdIdxRaw,
}

/// Error while trying to add a new production to the grammar.
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub enum AddProductionError {
    /// The right-hand side of the production exceeds [`u16::MAX`].
    RhsTooLong,
    /// There are already [`u16::MAX`] productions associated with the given left-hand
    /// side.
    TooManyProductions,
}

/// Error while trying to forbid a particular derivation, using
/// [`Production::forbid_derivation`].
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct ForbidDerivationError;

impl Grammar {
    /// Creates a new grammar with no productions.
    pub fn new() -> Self {
        Self {
            productions: Map::default(),
            next_precedence_family: 0,
            conflict_solutions: Vec::new(),
        }
    }

    /// Add a production to the grammar, and returns a [`ProdIdx`] than can be used
    /// to retrieve it.
    ///
    /// Returns `Err(AddProductionError)` if `rhs.len() > u16::MAX`, or if there is
    /// already `u16::MAX` productions associated with `lhs`.
    pub fn add_production(
        &mut self,
        lhs: Node,
        rhs: Vec<Symbol>,
    ) -> Result<ProdIdx, AddProductionError> {
        let productions = self.productions.entry(lhs).or_default();
        if rhs.len() > SymbolIdx::MAX as usize {
            return Err(AddProductionError::RhsTooLong);
        }
        if productions.len() == ProdIdxRaw::MAX as usize {
            return Err(AddProductionError::TooManyProductions);
        }
        let index = productions.len() as ProdIdxRaw;
        productions.push(Production {
            symbols: rhs.into(),
            precedence: PrecedenceLevels::default(),
            forbidden_derivations: Vec::new(),
        });
        Ok(ProdIdx { lhs, index })
    }

    /// Add a solution to a LR conflict to the grammar.
    ///
    /// # Note
    /// When using conflict solutions, the [LALR](crate::Algorithm::Lalr) algorithm
    /// might remove seemingly valid parses.
    ///
    /// To avoid this, conflict resolution is only applied when using the
    /// [LR](crate::Algorithm::Lr) algorithm.
    pub fn add_conflict_solution(&mut self, conflict_solution: ConflictSolution) {
        self.conflict_solutions.push(conflict_solution);
    }

    /// Generate a new precedence family.
    ///
    /// Note that using multiple precedence families on the same production may not be
    /// a good idea.
    pub fn add_precedence_family(&mut self) -> PrecedenceFamilyToken {
        let family = PrecedenceFamilyToken(self.next_precedence_family);
        self.next_precedence_family += 1;
        family
    }

    /// Get all the nodes that appear as the right-hand side of a production in the
    /// grammar.
    pub fn get_all_nodes(&self) -> impl Iterator<Item = Node> + '_ {
        self.productions.keys().copied()
    }

    /// Get all conflict solutions added with [`Self::add_conflict_solution`].
    pub fn get_conflict_solutions(&self) -> &[ConflictSolution] {
        &self.conflict_solutions
    }

    /// Get all conflict solutions added with [`Self::add_conflict_solution`].
    pub fn get_conflict_solutions_mut(&mut self) -> &mut [ConflictSolution] {
        &mut self.conflict_solutions
    }

    /// Get the production associated with `prod_idx`.
    pub fn get_production(&self, prod_idx: ProdIdx) -> Option<&Production> {
        self.productions
            .get(&prod_idx.lhs)?
            .get(prod_idx.index as usize)
    }

    /// Get the production associated with `prod_idx`.
    pub fn get_production_mut(&mut self, prod_idx: ProdIdx) -> Option<&mut Production> {
        self.productions
            .get_mut(&prod_idx.lhs)?
            .get_mut(prod_idx.index as usize)
    }

    /// Get the right-hand side of the production associated with `prod_idx`.
    pub fn get_rhs(&self, prod_idx: ProdIdx) -> Option<&[Symbol]> {
        Some(
            &self
                .productions
                .get(&prod_idx.lhs)?
                .get(prod_idx.index as usize)?
                .symbols as &[_],
        )
    }

    /// Get all the productions in the grammar.
    pub fn get_all_productions(&self) -> impl Iterator<Item = (ProdIdx, &'_ [Symbol])> + '_ {
        self.productions.iter().flat_map(|(&n, productions)| {
            productions.iter().enumerate().map(move |(index, b)| {
                (
                    ProdIdx {
                        lhs: n,
                        index: index as ProdIdxRaw,
                    },
                    (&b.symbols as &[_]),
                )
            })
        })
    }

    /// Get all the productions for node `node`.
    pub fn get_node_productions(
        &self,
        node: Node,
    ) -> impl Iterator<Item = (ProdIdx, &'_ Production)> + '_ {
        self.productions
            .get(&node)
            .map(|productions| {
                productions
                    .iter()
                    .enumerate()
                    .map(move |(index, production)| {
                        let prod_idx = ProdIdx {
                            lhs: node,
                            index: index as ProdIdxRaw,
                        };
                        (prod_idx, production)
                    })
            })
            .into_iter()
            .flatten()
    }
}

impl Default for Grammar {
    fn default() -> Self {
        Self::new()
    }
}

#[cfg(test)]
mod tests {
    use super::*;
    use crate::test_common::{nodes::START, tokens::T1};

    #[test]
    fn grammar_errors() {
        let mut grammar = Grammar::default();

        assert_eq!(
            grammar
                .add_production(START, vec![Symbol::Token(T1); SymbolIdx::MAX as usize + 1])
                .unwrap_err(),
            AddProductionError::RhsTooLong
        );
        for _ in 0..ProdIdxRaw::MAX {
            assert!(grammar.add_production(START, vec![]).is_ok());
        }
        assert_eq!(
            grammar.add_production(START, vec![]).unwrap_err(),
            AddProductionError::TooManyProductions
        );
    }
}