ielr 0.1.1

Table generation backend for LR parser generators
Documentation
/*
 * Copyright 2022 Arnaud Golfouse
 *
 * This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0. If a copy of the MPL was not distributed with this
 * file, You can obtain one at https://mozilla.org/MPL/2.0/.
 */

use super::Nullables;
use crate::{
    input::{Grammar, Node, Symbol, Token},
    structures::{Map, Set, Visit},
};

/// Map a node `n ∈ Node` to `{ t ∈ Token ∣ n ⇒* tα }`
#[derive(Debug)]
pub(crate) struct FirstSets(Map<Node, Set<Token>>);

impl FirstSets {
    pub(super) fn new(grammar: &Grammar, nullables: &Nullables) -> Self {
        Self(
            grammar
                .get_all_nodes()
                .map(|node| (node, get_first_sets_for(grammar, nullables, node)))
                .collect(),
        )
    }
}

/// Computes `{ t ∈ Token ∣ node ⇒* tα }`
///
/// # Source
/// <http://hyacc.sourceforge.net/files/FirstK_APPLC13.pdf>
fn get_first_sets_for(grammar: &Grammar, nullables: &Nullables, node: Node) -> Set<Token> {
    let mut result: Set<Token> = Set::default();
    // TODO(perf): Maybe use another hasher
    let mut visit: Visit<Vec<Symbol>> = Visit::new();
    for (_, production) in grammar.get_node_productions(node) {
        visit.insert(up_to_non_nullable(nullables, &production.symbols).to_owned());
    }
    while let Some(symbols) = visit.pop() {
        for mut new_symbols in expand_first(grammar, &symbols) {
            new_symbols.truncate(up_to_non_nullable(nullables, &new_symbols).len());
            visit.insert(new_symbols);
        }
    }
    for symbols in visit {
        if let Some(Symbol::Token(token)) = symbols.first() {
            result.insert(*token);
        }
    }
    result
}

/// Returns the prefix of `symbols` up to (and including) the first symbol that is not
/// nullable.
///
/// If `symbols` contains only nullables symbols, return all of `symbols`.
fn up_to_non_nullable<'a>(nullables: &Nullables, symbols: &'a [Symbol]) -> &'a [Symbol] {
    let mut found_non_nullable = false;
    for (index, symbol) in symbols.iter().enumerate() {
        if found_non_nullable {
            return &symbols[..index];
        }
        if let Symbol::Node(n) = symbol {
            if nullables.is_nullable(*n) {
                continue;
            }
        }
        found_non_nullable = true;
    }
    symbols
}

/// Get all symbols slices obtained by replacing the first symbol of `symbols` with all
/// its direct derivations.
///
/// If `symbols` is empty or starts with a [`Token`], returns an empty `Vec`.
fn expand_first(grammar: &Grammar, symbols: &[Symbol]) -> Vec<Vec<Symbol>> {
    let (symbol, after) = match symbols.split_first() {
        Some(x) => x,
        None => return Vec::new(),
    };
    match symbol {
        Symbol::Token(_) => Vec::new(),
        Symbol::Node(n) => {
            let mut result = Vec::new();
            for (_, production) in grammar.get_node_productions(*n) {
                let mut new_symbols = Vec::with_capacity(production.symbols.len() + after.len());
                new_symbols.extend_from_slice(&production.symbols);
                new_symbols.extend_from_slice(after);
                result.push(new_symbols);
            }
            result
        }
    }
}

impl std::ops::Deref for FirstSets {
    type Target = Map<Node, Set<Token>>;

    fn deref(&self) -> &Self::Target {
        &self.0
    }
}

#[cfg(test)]
mod tests {
    use super::*;
    use crate::{
        input::{
            Grammar,
            Symbol::{Node as N, Token as T},
        },
        test_common::{
            nodes::{E, FUNCTION, ITEM, ITEMS, N1, N2, N3, N4, START, VISIBILITY},
            tokens::{FN, INT, MINUS, PLUS, PUB, T1, T2, T3},
        },
    };

    #[test]
    fn derive_tokens_expression() {
        let mut grammar = Grammar::new();
        for (lhs, rhs) in [
            (START, vec![N(E)]),
            (E, vec![T(INT)]),
            (E, vec![T(MINUS), N(E)]),
            (E, vec![N(E), T(PLUS), N(E)]),
            (E, vec![N(E), T(MINUS), N(E)]),
        ] {
            grammar.add_production(lhs, rhs).unwrap();
        }
        let nullables = Nullables::new(&grammar);

        let set_direct = get_first_sets_for(&grammar, &nullables, E);
        let first_sets = FirstSets::new(&grammar, &nullables);
        let set_via_first_sets = first_sets.get(&E).unwrap();
        assert_eq!(set_direct, {
            let mut set = Set::default();
            set.insert(INT);
            set.insert(MINUS);
            set
        });
        assert_eq!(set_direct, *set_via_first_sets);
    }

    #[test]
    fn first_nullables() {
        let mut grammar = Grammar::new();
        for (lhs, rhs) in [
            (START, vec![N(N1), N(N2)]),
            (N1, vec![N(N3), N(N1)]),
            (N1, vec![]),
            (N2, vec![T(T2), N(N4)]),
            (N3, vec![T(T3)]),
            (N4, vec![T(T1)]),
            (N4, vec![N(N1)]),
        ] {
            grammar.add_production(lhs, rhs).unwrap();
        }
        let nullables = Nullables::new(&grammar);

        for &(symbols, len) in &[
            (&[] as &[_], 0),
            (&[N(START), T(T1)], 1),
            (&[N(N1), N(N2), T(T1)], 2),
            (&[N(N3), T(T1)], 1),
            (&[N(N1), N(N4), N(N2), T(T1)], 3),
        ] {
            assert_eq!(
                up_to_non_nullable(&nullables, symbols).len(),
                len,
                "symbols = {symbols:?}"
            );
        }
    }

    #[test]
    fn derive_tokens_items() {
        let mut grammar = Grammar::new();
        for (lhs, rhs) in [
            (START, vec![N(ITEMS)]),
            (ITEMS, vec![N(ITEM), N(ITEMS)]),
            (ITEMS, vec![]),
            (ITEM, vec![N(FUNCTION)]),
            (FUNCTION, vec![N(VISIBILITY), T(FN)]),
            (VISIBILITY, vec![T(PUB)]),
            (VISIBILITY, vec![]),
        ] {
            grammar.add_production(lhs, rhs).unwrap();
        }
        let nullables = Nullables::new(&grammar);

        assert_eq!(get_first_sets_for(&grammar, &nullables, ITEM), {
            let mut set = Set::default();
            set.insert(FN);
            set.insert(PUB);
            set
        });
    }
}