ielr 0.1.1

Table generation backend for LR parser generators
Documentation
/*
 * Copyright 2022 Arnaud Golfouse
 *
 * This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0. If a copy of the MPL was not distributed with this
 * file, You can obtain one at https://mozilla.org/MPL/2.0/.
 */

//! Computation of data specific to the grammar.
//!
//! This includes nullables nodes, or the FIRST₁ sets.

mod conflict_resolution;
mod first_set;
mod nullable;

use self::{first_set::FirstSets, nullable::Nullables};
use crate::{
    input::{Grammar, Node, ProdIdx, Symbol, Token},
    output::error::CycleError,
    structures::{Map, Set},
};
use std::ops;

pub(crate) use self::conflict_resolution::{
    ConflictResolutionCache, ContributionIndex, ContributionSelection, ReduceIdx, ShiftOrReduce,
};

#[derive(Debug)]
pub(crate) struct GrammarData<'a> {
    /// Reference to the grammar.
    pub(crate) grammar: &'a Grammar,
    /// Which nodes are nullable.
    pub(crate) nullables: Nullables,
    first_sets: FirstSets,
}

impl<'a> GrammarData<'a> {
    pub(crate) fn new(grammar: &'a Grammar) -> Result<Self, CycleError> {
        let nullables = Nullables::new(grammar);
        Self::check_cycles(grammar, &nullables)?;
        let first_sets = FirstSets::new(grammar, &nullables);
        Ok(Self {
            grammar,
            nullables,
            first_sets,
        })
    }

    /// Returns `{ t ∈ Tokens ∣ symbols ⇒* tα }`
    ///
    /// FIXME: precedence may forbid some derived tokens. This is very unlikely to
    /// happen in real life grammars though.
    ///
    /// Precision : for example, if the precedence of an hypothetical prefix operator
    /// `'@'` is weaker than `'+'`, then we can't derive `E → E + E → @ E + E` : thus
    /// `E + E` should not derive `@`.
    pub(crate) fn derived_tokens(&self, symbols: &[Symbol]) -> Set<Token> {
        let mut result = Set::default();
        for symbol in symbols {
            match symbol {
                Symbol::Token(t) => {
                    result.insert(*t);
                    break;
                }
                Symbol::Node(n) => {
                    if let Some(set) = self.first_sets.get(n) {
                        result.extend(set);
                    }
                    if !self.nullables.is_nullable(*n) {
                        break;
                    }
                }
            }
        }
        result
    }

    /// Check for `A ⇒+ A` derivations.
    // TODO: use conflict_resolution::graph_has_cycle ?
    fn check_cycles(grammar: &Grammar, nullables: &Nullables) -> Result<(), CycleError> {
        // Map `A` to `{ B ∣ A ⇒ αBβ, with α ⇒* ε, β ⇒* ε }`.
        let mut matches_single_node: Map<Node, Set<(Node, ProdIdx)>> = Map::default();

        'prod: for (prod_idx, rhs) in grammar.get_all_productions() {
            let lhs = prod_idx.lhs;
            let mut matched_nodes = Set::default();
            let mut nullable = true;
            for symbol in rhs {
                match *symbol {
                    Symbol::Token(_) => continue 'prod,
                    Symbol::Node(n) => {
                        let is_nullable = nullables.is_nullable(n);
                        if nullable {
                            matched_nodes.insert(n);
                            nullable = is_nullable;
                        } else if !is_nullable {
                            continue 'prod;
                        }
                    }
                }
            }
            matches_single_node
                .entry(lhs)
                .or_default()
                .extend(matched_nodes.into_iter().map(|node| (node, prod_idx)));
        }
        /// By using `matches_single_node`, go through the graph of `A ⇒ B` nodes, and
        /// tries to find `node`.
        ///
        /// # Returns
        /// The path necessary to derive `node ⇒+ node`.
        fn find_self_match(
            matches_single_node: &Map<Node, Set<(Node, ProdIdx)>>,
            node: Node,
            at: Node,
            visited: &mut Set<Node>,
        ) -> Option<Vec<ProdIdx>> {
            let matched = matches_single_node.get(&at)?;
            for &(n, prod) in matched {
                if n == node {
                    return Some(vec![prod]);
                }
                if visited.insert(n) {
                    if let Some(mut path) = find_self_match(matches_single_node, node, n, visited) {
                        path.push(prod);
                        return Some(path);
                    }
                }
            }
            None
        }
        for &node in matches_single_node.keys() {
            if let Some(mut path) =
                find_self_match(&matches_single_node, node, node, &mut Set::default())
            {
                path.reverse();
                return Err(CycleError { node, path });
            }
        }
        Ok(())
    }
}

impl<'a> ops::Deref for GrammarData<'a> {
    type Target = Grammar;

    fn deref(&self) -> &Self::Target {
        self.grammar
    }
}

#[cfg(test)]
mod tests {
    use super::*;
    use crate::{
        input::Symbol::{Node as N, Token as T},
        test_common::{
            nodes::{N1, N2, N3, START},
            tokens::T1,
        },
    };

    #[test]
    fn cycle_error() {
        let mut grammar = Grammar::new();
        let prod = grammar.add_production(START, vec![N(START)]).unwrap();
        assert_eq!(
            GrammarData::new(&grammar).unwrap_err(),
            CycleError {
                node: START,
                path: vec![prod]
            }
        );

        let mut grammar = Grammar::new();
        for (lhs, rhs) in [
            (START, vec![N(N1)]),
            (N1, vec![N(N2), T(T1)]),
            (N1, vec![N(N2), N(N3)]),
            (N2, vec![N(N3)]),
            (N3, vec![]),
            (N3, vec![N(N1)]),
        ] {
            grammar.add_production(lhs, rhs).unwrap();
        }
        assert!(GrammarData::new(&grammar).is_err());
    }
}