use std;
use std::fmt;
use std::ops::{Index, IndexMut};
use std::collections::{BTreeSet, HashSet};
use std::iter::{once, repeat};
use std::mem::replace;
use bit_set::BitSet;
use Pretty;
use grammar::{self, Grammar, NonterminalId, RuleId, Symbol, TerminalId};
use first::FirstSets;
use honalee;
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub struct ItemSets(Vec<ItemSet>);
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub struct ItemSet {
pub(crate) id: ItemSetId,
pub(crate) items: Vec<Item>,
pub(crate) kernel: usize,
}
#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
pub struct Item {
pub(crate) rule: RuleId,
pub(crate) lookahead: TerminalId,
pub(crate) marker: usize,
pub(crate) action: Option<(Symbol, Action)>,
}
#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
pub enum Action {
Shift(ItemSetId),
Reduce(RuleId),
}
#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct ItemSetId(usize);
impl ItemSets {
pub fn new(sets: Vec<ItemSet>) -> ItemSets {
ItemSets(sets)
}
pub fn compute(grammar: &Grammar) -> ItemSets {
honalee::construct_item_sets(grammar)
}
pub fn all(&self) -> &[ItemSet] {
&self.0
}
pub fn pretty<'a>(&'a self, grammar: &'a Grammar) -> Pretty<&'a Grammar, &'a Self> {
Pretty::new(grammar, self)
}
pub fn compress(&mut self) {
for is in &mut self.0 {
is.compress();
}
}
}
impl fmt::Debug for ItemSetId {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "i{}", self.0)
}
}
impl Index<ItemSetId> for ItemSets {
type Output = ItemSet;
fn index(&self, index: ItemSetId) -> &ItemSet {
&self.0[index.as_usize()]
}
}
impl IndexMut<ItemSetId> for ItemSets {
fn index_mut(&mut self, index: ItemSetId) -> &mut ItemSet {
&mut self.0[index.as_usize()]
}
}
impl<'a> fmt::Display for Pretty<&'a Grammar, &'a ItemSets> {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
for (is, sep) in self.item.0.iter().zip(once("").chain(repeat("\n"))) {
write!(f, "{}{}", sep, is.pretty(self.ctx))?;
}
Ok(())
}
}
impl ItemSet {
pub fn new(id: ItemSetId) -> ItemSet {
ItemSet {
id: id,
items: Vec::new(),
kernel: 0,
}
}
pub fn with_items(id: ItemSetId, items: Vec<Item>) -> ItemSet {
let mut set = ItemSet::new(id);
set.kernel = items.len();
set.items = items;
set
}
pub fn id(&self) -> ItemSetId {
self.id
}
pub fn items(&self) -> &[Item] {
&self.items
}
pub fn kernel_items(&self) -> &[Item] {
&self.items[0..self.kernel]
}
pub fn pretty<'a>(&'a self, grammar: &'a Grammar) -> Pretty<&'a Grammar, &'a Self> {
Pretty::new(grammar, self)
}
pub fn closure(&mut self, grammar: &Grammar, first_sets: &FirstSets) {
compute_closure(self, grammar, first_sets)
}
pub fn kernel_item_cores(&self) -> KernelCores {
let set: BTreeSet<_> = self.items[0..self.kernel]
.iter()
.map(|item| (item.rule, item.marker))
.collect();
KernelCores(set.into_iter().collect())
}
pub fn replace_actions(&mut self, from: Action, to: Action) {
for &mut (_, ref mut action) in self.actions_mut() {
if *action == from {
*action = to;
}
}
}
pub fn actions(&self) -> Actions {
Actions(self.items.iter())
}
pub fn actions_mut(&mut self) -> ActionsMut {
ActionsMut(self.items.iter_mut())
}
pub fn merge(&mut self, other: ItemSet) {
let mut present: HashSet<Item> = self.items
.iter()
.cloned()
.map(|mut item| {
item.action = None;
item
})
.collect();
for (index, item) in other.items.into_iter().enumerate() {
let mut item_na = item;
item_na.action = None;
if present.insert(item_na) {
if index < other.kernel {
self.items.insert(self.kernel, item);
self.kernel += 1;
} else {
self.items.push(item);
}
}
}
}
pub fn compress(&mut self) {
let mut present = HashSet::<Item>::new();
let items = replace(&mut self.items, Vec::new());
for (index, mut item) in items.into_iter().enumerate() {
if index < self.kernel {
self.items.push(item);
} else {
if item.is_shift() {
item.lookahead = grammar::NIL;
}
if present.insert(item) {
self.items.push(item);
}
}
}
}
}
impl<'a> fmt::Display for Pretty<&'a Grammar, &'a ItemSet> {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
writeln!(f, "i{}:", self.item.id)?;
for (index, item) in self.item.items.iter().enumerate() {
if index > 0 {
write!(f, "\n")?;
}
write!(f, " {} {}", index, item.pretty(self.ctx))?;
if index < self.item.kernel {
write!(f, "*")?;
}
if let Some((symbol, action)) = item.action {
write!(f, " ({}, {})", symbol.pretty(self.ctx), action)?;
}
}
if self.item.items.is_empty() {
write!(f, " <empty>")?;
}
Ok(())
}
}
impl Item {
pub fn rule(&self) -> RuleId {
self.rule
}
pub fn lookahead(&self) -> TerminalId {
self.lookahead
}
pub fn marker(&self) -> usize {
self.marker
}
pub fn action(&self) -> Option<(Symbol, Action)> {
self.action
}
pub fn set_action(&mut self, action: Option<(Symbol, Action)>) {
self.action = action
}
pub fn pretty<'a>(&'a self, grammar: &'a Grammar) -> Pretty<&'a Grammar, &'a Self> {
Pretty::new(grammar, self)
}
pub fn is_shift(&self) -> bool {
self.action
.map(|(_, action)| action.is_shift())
.unwrap_or(false)
}
pub fn is_reduce(&self) -> bool {
self.action
.map(|(_, action)| action.is_reduce())
.unwrap_or(false)
}
pub fn has_action(&self) -> bool {
self.action.is_some()
}
}
impl<'a> fmt::Display for Pretty<&'a Grammar, &'a Item> {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
if self.item.rule == grammar::ACCEPT {
write!(f, "[$accept ->")?;
if self.item.marker == 0 {
write!(f, " .")?;
}
write!(f, " {}", NonterminalId::from_usize(0).pretty(self.ctx))?;
if self.item.marker == 1 {
write!(f, " .")?;
}
} else {
let rule = self.ctx.rule(self.item.rule);
write!(f, "[{} ->", rule.name().pretty(self.ctx))?;
let symbols = rule.symbols();
for symbol in &symbols[0..self.item.marker] {
write!(f, " {}", symbol.pretty(self.ctx))?;
}
write!(f, " .")?;
for symbol in &symbols[self.item.marker..] {
write!(f, " {}", symbol.pretty(self.ctx))?;
}
}
write!(f, ", {}]", self.item.lookahead.pretty(self.ctx))?;
Ok(())
}
}
impl Action {
pub fn is_shift(&self) -> bool {
match *self {
Action::Shift(_) => true,
_ => false,
}
}
pub fn is_reduce(&self) -> bool {
match *self {
Action::Reduce(_) => true,
_ => false,
}
}
}
impl fmt::Display for Action {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match *self {
Action::Shift(id) => write!(f, "i{}", id),
Action::Reduce(grammar::ACCEPT) => write!(f, "$accept"),
Action::Reduce(id) => write!(f, "r{}", id.as_usize()),
}
}
}
impl ItemSetId {
pub fn from_usize(id: usize) -> ItemSetId {
ItemSetId(id)
}
pub fn as_usize(self) -> usize {
self.0
}
}
impl fmt::Display for ItemSetId {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "{}", self.0)
}
}
fn compute_closure(item_set: &mut ItemSet, grammar: &Grammar, first_sets: &FirstSets) {
let mut done: HashSet<Item> = item_set.items.iter().cloned().collect();
let mut tail = 0;
while item_set.items.len() > tail {
let item = item_set.items[tail];
tail += 1;
if item.rule == grammar::ACCEPT {
if item.marker == 0 {
for &rule_id in grammar.rules_for_nonterminal(NonterminalId::from_usize(0)) {
let new_item = Item {
rule: rule_id,
lookahead: item.lookahead,
marker: 0,
action: None,
};
if !done.contains(&new_item) {
done.insert(new_item);
item_set.items.push(new_item);
}
}
}
} else {
let symbols = grammar.rule(item.rule).symbols();
if item.marker == symbols.len() {
continue;
}
match symbols[item.marker] {
Symbol::Terminal(_) => (),
Symbol::Nonterminal(id) => {
let (mut follow_set, epsilon) =
compute_follow_set(&symbols[item.marker + 1..], grammar, first_sets);
if epsilon {
follow_set.insert(item.lookahead.as_usize());
}
for &rule_id in grammar.rules_for_nonterminal(id) {
for fs in &follow_set {
let new_item = Item {
rule: rule_id,
lookahead: TerminalId::from_usize(fs),
marker: 0,
action: None,
};
if !done.contains(&new_item) {
done.insert(new_item);
item_set.items.push(new_item);
}
}
}
}
}
}
}
}
fn compute_follow_set<'a, I>(
symbols: I,
grammar: &Grammar,
first_sets: &FirstSets,
) -> (BitSet, bool)
where
I: IntoIterator<Item = &'a Symbol>,
{
let mut set = BitSet::with_capacity(grammar.terminal_id_bound());
for symbol in symbols.into_iter() {
match *symbol {
Symbol::Terminal(id) => {
set.insert(id.as_usize());
return (set, false);
}
Symbol::Nonterminal(id) => {
let fs = first_sets.get(id);
set.union_with(&fs.symbols);
if !fs.has_epsilon {
return (set, false);
}
}
}
}
(set, true)
}
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct KernelCores(Vec<(RuleId, usize)>);
pub struct Actions<'a>(std::slice::Iter<'a, Item>);
pub struct ActionsMut<'a>(std::slice::IterMut<'a, Item>);
impl<'a> Iterator for Actions<'a> {
type Item = &'a (Symbol, Action);
fn next(&mut self) -> Option<Self::Item> {
while let Some(item) = self.0.next() {
if let Some(ref action) = item.action {
return Some(action);
}
}
None
}
}
impl<'a> Iterator for ActionsMut<'a> {
type Item = &'a mut (Symbol, Action);
fn next(&mut self) -> Option<Self::Item> {
while let Some(item) = self.0.next() {
if let Some(ref mut action) = item.action {
return Some(action);
}
}
None
}
}
#[cfg(test)]
mod tests {
use grammar::{Grammar, Rule, END};
use item_set::ItemSets;
#[test]
fn left_recursion() {
let mut g = Grammar::new();
let a = g.add_nonterminal("A");
let b = g.add_nonterminal("B");
let c = g.add_terminal("c");
g.add_rule(Rule::new(a, vec![a.into(), b.into()]));
g.add_rule(Rule::new(a, vec![b.into()]));
g.add_rule(Rule::new(b, vec![c.into()]));
let is = ItemSets::compute(&g);
println!("{}", is.pretty(&g));
let la_exp = vec![END, c];
for is in is.all()[2..].iter() {
let la_act: Vec<_> = is.items().iter().map(|i| i.lookahead).collect();
assert_eq!(la_act, la_exp);
}
}
}