use super::Nullables;
use crate::{
input::{Grammar, Node, Symbol, Token},
structures::{Map, Set, Visit},
};
#[derive(Debug)]
pub(crate) struct FirstSets(Map<Node, Set<Token>>);
impl FirstSets {
pub(super) fn new(grammar: &Grammar, nullables: &Nullables) -> Self {
Self(
grammar
.get_all_nodes()
.map(|node| (node, get_first_sets_for(grammar, nullables, node)))
.collect(),
)
}
}
fn get_first_sets_for(grammar: &Grammar, nullables: &Nullables, node: Node) -> Set<Token> {
let mut result: Set<Token> = Set::default();
let mut visit: Visit<Vec<Symbol>> = Visit::new();
for (_, production) in grammar.get_node_productions(node) {
visit.insert(up_to_non_nullable(nullables, &production.symbols).to_owned());
}
while let Some(symbols) = visit.pop() {
for mut new_symbols in expand_first(grammar, &symbols) {
new_symbols.truncate(up_to_non_nullable(nullables, &new_symbols).len());
visit.insert(new_symbols);
}
}
for symbols in visit {
if let Some(Symbol::Token(token)) = symbols.first() {
result.insert(*token);
}
}
result
}
fn up_to_non_nullable<'a>(nullables: &Nullables, symbols: &'a [Symbol]) -> &'a [Symbol] {
let mut found_non_nullable = false;
for (index, symbol) in symbols.iter().enumerate() {
if found_non_nullable {
return &symbols[..index];
}
if let Symbol::Node(n) = symbol {
if nullables.is_nullable(*n) {
continue;
}
}
found_non_nullable = true;
}
symbols
}
fn expand_first(grammar: &Grammar, symbols: &[Symbol]) -> Vec<Vec<Symbol>> {
let (symbol, after) = match symbols.split_first() {
Some(x) => x,
None => return Vec::new(),
};
match symbol {
Symbol::Token(_) => Vec::new(),
Symbol::Node(n) => {
let mut result = Vec::new();
for (_, production) in grammar.get_node_productions(*n) {
let mut new_symbols = Vec::with_capacity(production.symbols.len() + after.len());
new_symbols.extend_from_slice(&production.symbols);
new_symbols.extend_from_slice(after);
result.push(new_symbols);
}
result
}
}
}
impl std::ops::Deref for FirstSets {
type Target = Map<Node, Set<Token>>;
fn deref(&self) -> &Self::Target {
&self.0
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::{
input::{
Grammar,
Symbol::{Node as N, Token as T},
},
test_common::{
nodes::{E, FUNCTION, ITEM, ITEMS, N1, N2, N3, N4, START, VISIBILITY},
tokens::{FN, INT, MINUS, PLUS, PUB, T1, T2, T3},
},
};
#[test]
fn derive_tokens_expression() {
let mut grammar = Grammar::new();
for (lhs, rhs) in [
(START, vec![N(E)]),
(E, vec![T(INT)]),
(E, vec![T(MINUS), N(E)]),
(E, vec![N(E), T(PLUS), N(E)]),
(E, vec![N(E), T(MINUS), N(E)]),
] {
grammar.add_production(lhs, rhs).unwrap();
}
let nullables = Nullables::new(&grammar);
let set_direct = get_first_sets_for(&grammar, &nullables, E);
let first_sets = FirstSets::new(&grammar, &nullables);
let set_via_first_sets = first_sets.get(&E).unwrap();
assert_eq!(set_direct, {
let mut set = Set::default();
set.insert(INT);
set.insert(MINUS);
set
});
assert_eq!(set_direct, *set_via_first_sets);
}
#[test]
fn first_nullables() {
let mut grammar = Grammar::new();
for (lhs, rhs) in [
(START, vec![N(N1), N(N2)]),
(N1, vec![N(N3), N(N1)]),
(N1, vec![]),
(N2, vec![T(T2), N(N4)]),
(N3, vec![T(T3)]),
(N4, vec![T(T1)]),
(N4, vec![N(N1)]),
] {
grammar.add_production(lhs, rhs).unwrap();
}
let nullables = Nullables::new(&grammar);
for &(symbols, len) in &[
(&[] as &[_], 0),
(&[N(START), T(T1)], 1),
(&[N(N1), N(N2), T(T1)], 2),
(&[N(N3), T(T1)], 1),
(&[N(N1), N(N4), N(N2), T(T1)], 3),
] {
assert_eq!(
up_to_non_nullable(&nullables, symbols).len(),
len,
"symbols = {symbols:?}"
);
}
}
#[test]
fn derive_tokens_items() {
let mut grammar = Grammar::new();
for (lhs, rhs) in [
(START, vec![N(ITEMS)]),
(ITEMS, vec![N(ITEM), N(ITEMS)]),
(ITEMS, vec![]),
(ITEM, vec![N(FUNCTION)]),
(FUNCTION, vec![N(VISIBILITY), T(FN)]),
(VISIBILITY, vec![T(PUB)]),
(VISIBILITY, vec![]),
] {
grammar.add_production(lhs, rhs).unwrap();
}
let nullables = Nullables::new(&grammar);
assert_eq!(get_first_sets_for(&grammar, &nullables, ITEM), {
let mut set = Set::default();
set.insert(FN);
set.insert(PUB);
set
});
}
}