use std::mem::swap;
use std::iter::repeat;
use bit_set::BitSet;
use grammar::{Grammar, NonterminalId, Symbol};
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub struct FirstSets(Vec<FirstSet>);
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub struct FirstSet {
pub symbols: BitSet,
pub has_epsilon: bool,
}
impl FirstSets {
pub fn compute(grammar: &Grammar) -> FirstSets {
compute(grammar)
}
pub fn all(&self) -> &[FirstSet] {
&self.0
}
pub fn get(&self, nt: NonterminalId) -> &FirstSet {
&self.0[nt.as_usize()]
}
}
fn compute(grammar: &Grammar) -> FirstSets {
let num_term = grammar.terminal_id_bound();
let num_nonterm = grammar.nonterminal_id_bound();
let mut update = BitSet::with_capacity(num_nonterm);
let mut next_update = BitSet::with_capacity(num_nonterm);
for id in 0..num_nonterm {
update.insert(id);
}
let mut fs = FirstSets(
repeat(FirstSet {
symbols: BitSet::with_capacity(num_term),
has_epsilon: false,
}).take(num_nonterm)
.collect(),
);
let mut deps: Vec<BitSet> = repeat(BitSet::with_capacity(num_nonterm))
.take(num_nonterm)
.collect();
while !update.is_empty() {
for current in update.iter() {
if next_update.contains(current) {
continue;
}
let mut new_fs = fs.0[current].clone();
let mut no_rules = true;
for rule in grammar.rules().filter(|r| r.name().as_usize() == current) {
no_rules = false;
let tight = collect_symbols(rule.symbols(), &mut |symbol: &Symbol| match *symbol {
Symbol::Terminal(id) => {
new_fs.symbols.insert(id.as_usize());
true
}
Symbol::Nonterminal(id) => {
deps[id.as_usize()].insert(current);
let other = &fs.0[id.as_usize()];
new_fs.symbols.union_with(&other.symbols);
!other.has_epsilon
}
});
new_fs.has_epsilon |= !tight;
}
new_fs.has_epsilon |= no_rules;
if new_fs != fs.0[current] {
fs.0[current] = new_fs;
next_update.union_with(&deps[current]);
}
}
swap(&mut update, &mut next_update);
next_update.clear();
}
fs
}
fn collect_symbols<'a, I, F>(symbols: I, f: &mut F) -> bool
where
I: IntoIterator<Item = &'a Symbol>,
F: FnMut(&Symbol) -> bool,
{
for symbol in symbols {
let tight = match *symbol {
Symbol::Terminal(_) => f(symbol),
Symbol::Nonterminal(_) => f(symbol),
};
if tight {
return true;
}
}
false
}
#[cfg(test)]
#[allow(non_snake_case)]
mod test {
use super::*;
use grammar::Rule;
#[test]
fn simple_terminal() {
let mut g = Grammar::new();
let ntA = g.add_nonterminal("A");
let tb = g.add_terminal("b");
g.add_rule(Rule::new(ntA, vec![tb.into()]));
assert_eq!(
FirstSets::compute(&g),
FirstSets(vec![
FirstSet {
symbols: vec![tb.as_usize()].into_iter().collect(),
has_epsilon: false,
},
])
);
}
#[test]
fn simple_indirection() {
let mut g = Grammar::new();
let ntA = g.add_nonterminal("A");
let ntB = g.add_nonterminal("B");
let tc = g.add_terminal("c");
let td = g.add_terminal("d");
g.add_rule(Rule::new(ntA, vec![ntB.into()]));
g.add_rule(Rule::new(ntA, vec![td.into()]));
g.add_rule(Rule::new(ntB, vec![tc.into()]));
assert_eq!(
FirstSets::compute(&g),
FirstSets(vec![
FirstSet {
symbols: vec![tc.as_usize(), td.as_usize()].into_iter().collect(),
has_epsilon: false,
},
FirstSet {
symbols: vec![tc.as_usize()].into_iter().collect(),
has_epsilon: false,
},
])
);
}
#[test]
fn epsilon_rule_is_transparent() {
let mut g = Grammar::new();
let ntA = g.add_nonterminal("A");
let ntB = g.add_nonterminal("B");
let tc = g.add_terminal("c");
g.add_rule(Rule::new(ntA, vec![ntB.into(), tc.into()]));
assert_eq!(
FirstSets::compute(&g),
FirstSets(vec![
FirstSet {
symbols: vec![tc.as_usize()].into_iter().collect(),
has_epsilon: false,
},
FirstSet {
symbols: vec![].into_iter().collect(),
has_epsilon: true,
},
])
);
}
}