ielr 0.1.1

Table generation backend for LR parser generators
Documentation
/*
 * Copyright 2022 Arnaud Golfouse
 *
 * This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0. If a copy of the MPL was not distributed with this
 * file, You can obtain one at https://mozilla.org/MPL/2.0/.
 */

//! Compute which nodes are nullable.
//!
//! A nullable node is one that can derive ε.

use crate::{
    input::{Grammar, Node, Symbol},
    structures::Set,
};

/// Holds which nodes are nullables.
#[derive(Debug)]
pub(crate) struct Nullables {
    nullable_nodes: Set<Node>,
}

impl Nullables {
    pub(crate) fn new(grammar: &Grammar) -> Self {
        Self {
            nullable_nodes: nullables(grammar),
        }
    }

    pub(crate) fn is_nullable(&self, node: Node) -> bool {
        self.nullable_nodes.contains(&node)
    }

    pub(crate) fn is_nullable_symbol(&self, symbol: Symbol) -> bool {
        match symbol {
            Symbol::Token(_) => false,
            Symbol::Node(node) => self.nullable_nodes.contains(&node),
        }
    }

    /// Returns `true` if all symbols in `symbols` are nullables.
    pub(crate) fn are_nullable_symbols(&self, symbols: &[Symbol]) -> bool {
        for symbol in symbols {
            match symbol {
                Symbol::Token(_) => return false,
                Symbol::Node(non_term) => {
                    if !self.nullable_nodes.contains(non_term) {
                        return false;
                    }
                }
            }
        }
        true
    }
}

/// Get the set of nullable nodes for `grammar`.
fn nullables(grammar: &Grammar) -> Set<Node> {
    let (potentially_empty, empty) = {
        let mut potentially_empty = Vec::new();
        let mut empty = Set::default();
        for (prod_idx, rhs) in grammar.get_all_productions() {
            if rhs.is_empty() {
                empty.insert(prod_idx.lhs);
            } else {
                let mut nodes = Vec::new();
                let mut only_nodes = true;
                for symbol in rhs.iter() {
                    match symbol {
                        Symbol::Node(n) => nodes.push(*n),
                        Symbol::Token(_) => {
                            only_nodes = false;
                            break;
                        }
                    }
                }
                if only_nodes {
                    potentially_empty.push((prod_idx.lhs, nodes))
                }
            }
        }

        (potentially_empty, empty)
    };

    let mut result = empty;
    let mut modified = true;
    // Not very subtle, but eh
    while modified {
        modified = false;
        for (node, production_rhs) in &potentially_empty {
            if production_rhs.iter().all(|n| result.contains(n)) {
                modified |= result.insert(*node);
            }
        }
    }
    result
}

#[cfg(test)]
mod tests {
    use super::*;
    use crate::test_common::{
        nodes::{N1, N2, N3, N4, START},
        tokens::{T1, T2, T3},
    };

    #[test]
    fn nullables_nodes() {
        let mut grammar = Grammar::new();
        for (lhs, rhs) in [
            (START, vec![Symbol::Node(N1), Symbol::Node(N2)]),
            (N1, vec![Symbol::Token(T1)]),
            (N2, vec![]),
            (N2, vec![Symbol::Token(T2)]),
            (N2, vec![Symbol::Node(N3), Symbol::Node(N4)]),
            (N2, vec![Symbol::Node(N3), Symbol::Node(N4)]),
            (N3, vec![]),
            (N3, vec![Symbol::Token(T3)]),
            (N4, vec![]),
        ] {
            grammar.add_production(lhs, rhs).unwrap();
        }
        let result: Set<_> = [N2, N3, N4].into_iter().collect();
        assert_eq!(nullables(&grammar), result);
    }

    #[test]
    fn nullable_symbols() {
        let mut grammar = Grammar::new();
        for (lhs, rhs) in [
            (START, vec![Symbol::Node(N1), Symbol::Node(N2)]),
            (N1, vec![Symbol::Token(T1)]),
            (N2, vec![]),
            (N2, vec![Symbol::Token(T2)]),
            (N2, vec![Symbol::Node(N3), Symbol::Node(N4)]),
            (N2, vec![Symbol::Node(N3), Symbol::Node(N4)]),
            (N3, vec![]),
            (N3, vec![Symbol::Token(T3)]),
            (N4, vec![]),
        ] {
            grammar.add_production(lhs, rhs).unwrap();
        }
        let nullables = Nullables::new(&grammar);
        assert!(nullables.are_nullable_symbols(&[
            Symbol::Node(N2),
            Symbol::Node(N3),
            Symbol::Node(N4),
        ]));
        assert!(!nullables.are_nullable_symbols(&[
            Symbol::Node(N2),
            Symbol::Node(N3),
            Symbol::Token(T3),
            Symbol::Node(N4),
        ]));
        assert!(!nullables.are_nullable_symbols(&[
            Symbol::Node(N2),
            Symbol::Node(N3),
            Symbol::Node(N4),
            Symbol::Token(T1),
        ]));
    }
}