use super::{item::LRItem, precedence_annotations::PrecedenceAnnotations, LALR};
use crate::{
grammar_data::GrammarData,
input::{ProdIdx, Symbol},
structures::{BitSet, Map, ReVisit, Set},
ItemIdx, StateIdx,
};
#[derive(Clone)]
pub(crate) struct LRState<Kind> {
pub(crate) items: Vec<LRItem>,
pub(crate) core_items_len: ItemIdx,
pub(crate) transitions: Map<Symbol, StateIdx<Kind>>,
pub(crate) accessing_symbol: Option<Symbol>,
pub(crate) uses_precedence: bool,
}
impl<Kind> std::fmt::Debug for LRState<Kind> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("LRState")
.field("items", &self.items)
.field("core_items_len", &self.core_items_len)
.field("transitions", &self.transitions)
.field("accessing_symbol", &self.accessing_symbol)
.field("uses_precedence", &self.uses_precedence)
.finish()
}
}
impl<Kind> LRState<Kind> {
pub(crate) fn all_items(&self) -> &[LRItem] {
&self.items
}
pub(crate) fn core_items(&self) -> &[LRItem] {
&self.items[..self.core_items_len as usize]
}
pub(crate) fn core_items_mut(&mut self) -> &mut [LRItem] {
&mut self.items[..self.core_items_len as usize]
}
}
impl LRState<LALR> {
pub(super) fn new(core_items: Vec<LRItem>, accessing_symbol: Option<Symbol>) -> Self {
Self {
core_items_len: if accessing_symbol.is_some() {
core_items.len() as ItemIdx
} else {
0
},
items: core_items,
transitions: Map::default(),
accessing_symbol,
uses_precedence: false,
}
}
pub(crate) fn closure(&mut self, grammar_data: &GrammarData) {
type NonCoreKey = (ProdIdx, PrecedenceAnnotations);
let mut used_precedence = false;
let mut to_visit: ReVisit<ItemIdx, crate::structures::FastHasher> = ReVisit::new();
let mut prod_indices: Map<NonCoreKey, ItemIdx> = Map::default();
for (index, item) in self.items.iter().enumerate() {
if item.index == 0 {
prod_indices.insert((item.prod_idx, item.annotations.clone()), index as ItemIdx);
}
to_visit.insert(index as ItemIdx);
}
while let Some(index) = to_visit.pop() {
let index = index as usize;
let mut forbidden_nodes = Set::default();
for key in self.items[index]
.derived(grammar_data, &mut used_precedence, &mut forbidden_nodes)
.collect::<Vec<_>>()
{
if let Some(&existing_index) = prod_indices.get(&key) {
self.items[index]
.directly_derives
.insert(existing_index as ItemIdx);
continue;
}
let (prod_idx, annotations) = key;
let new_index = self.items.len() as ItemIdx;
prod_indices.insert((prod_idx, annotations.clone()), new_index);
self.items[index]
.directly_derives
.insert(new_index as ItemIdx);
let mut new_item = LRItem::new(prod_idx, grammar_data);
new_item.annotations = annotations;
self.items.push(new_item);
to_visit.insert(new_index);
}
self.items[index].forbidden_nodes.extend(forbidden_nodes);
}
let mut final_indices = Vec::new();
for n in 0..self.items.len() {
final_indices.push(n);
}
for n in 0..self.items.len() {
for k in n + 1..self.items.len() {
let item_n = &self.items[n];
let item_k = &self.items[k];
if LRItem::order(item_k, item_n).is_le() {
self.items.swap(n, k);
final_indices.swap(n, k);
}
}
}
let final_indices = {
let mut indices = vec![0; final_indices.len()];
for (i, j) in final_indices.into_iter().enumerate() {
indices[j] = i as ItemIdx;
}
indices
};
for item in &mut self.items {
let mut new_derives = BitSet::new();
for index in item.directly_derives.iter() {
new_derives.insert(final_indices[index as usize]);
}
item.directly_derives = new_derives;
}
self.derives_closure();
self.uses_precedence = used_precedence;
}
fn derives_closure(&mut self) {
let mut closure = Vec::new();
let mut to_visit = Vec::new();
for (index, item) in self.items.iter().enumerate() {
closure.push(item.directly_derives.clone());
for derived in item.directly_derives.iter() {
to_visit.push((index as ItemIdx, derived));
}
}
while let Some((from, to)) = to_visit.pop() {
for new_to in self.items[to as usize].directly_derives.iter() {
if closure[from as usize].insert(new_to) {
to_visit.push((from, new_to));
}
}
}
for (item, new_derives) in self.items.iter_mut().zip(closure) {
item.derives = new_derives
}
}
pub(super) fn successors(
&mut self,
grammar_data: &GrammarData,
) -> impl Iterator<Item = (Symbol, Vec<LRItem>)> {
let mut successors: Map<_, Vec<_>> = Map::default();
for item in &mut self.items {
if let Some(symbol) = item.current_symbol(grammar_data) {
let list = successors.entry(symbol).or_default();
item.next_item_local_index = Some(list.len() as ItemIdx);
list.push(item.successor());
}
}
successors.into_iter()
}
pub(super) fn compatible(&self, core_items: &[LRItem]) -> bool {
if self.core_items_len != core_items.len() as ItemIdx {
return false;
}
for (self_item, item) in self.items.iter().zip(core_items) {
if self_item.index != item.index || self_item.prod_idx != item.prod_idx {
return false;
}
}
true
}
}
impl<Kind> std::ops::Index<StateIdx<Kind>> for [LRState<Kind>] {
type Output = LRState<Kind>;
fn index(&self, index: StateIdx<Kind>) -> &Self::Output {
&self[index.get() as usize]
}
}
impl<Kind> std::ops::Index<StateIdx<Kind>> for Vec<LRState<Kind>> {
type Output = LRState<Kind>;
fn index(&self, index: StateIdx<Kind>) -> &Self::Output {
&self[index.get() as usize]
}
}
impl<Kind> std::ops::IndexMut<StateIdx<Kind>> for Vec<LRState<Kind>> {
fn index_mut(&mut self, index: StateIdx<Kind>) -> &mut Self::Output {
&mut self[index.get() as usize]
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::{
input::{
Grammar, Node,
Symbol::{Node as N, Token as T},
},
lr::precedence_annotations::PrecedenceAnnotations,
test_common::{
expressions,
nodes::{E, N1, N2, N3, N4, START},
tokens::{T1, T2, T3, T4},
},
};
impl LRItem {
fn with_derives(mut self, derives: &[ItemIdx]) -> Self {
self.derives = derives.iter().copied().collect();
self
}
fn with_direct_derives(mut self, derives: &[ItemIdx]) -> Self {
self.directly_derives = derives.iter().copied().collect();
self
}
fn with_annotations(mut self, annotations: PrecedenceAnnotations) -> Self {
self.annotations = annotations;
self
}
fn with_forbidden_nodes(mut self, nodes: &[Node]) -> Self {
self.forbidden_nodes = nodes.iter().copied().collect();
self
}
}
#[test]
fn closure_empty_state() {
let grammar = Grammar::new();
let grammar_data = GrammarData::new(&grammar).unwrap();
let mut empty_state = LRState::new(Vec::new(), None);
empty_state.closure(&grammar_data);
assert!(empty_state.items.is_empty());
}
#[test]
fn closure_without_annotations() {
let mut grammar = Grammar::new();
let start_prod = grammar
.add_production(START, vec![T(T1), N(N1), N(N2)])
.unwrap();
let n1_prod_1 = grammar.add_production(N1, vec![N(N2), T(T1)]).unwrap();
let n1_prod_2 = grammar.add_production(N1, vec![N(N3), T(T2)]).unwrap();
let n2_prod_1 = grammar.add_production(N2, vec![N(N4), N(N2)]).unwrap();
let n2_prod_2 = grammar.add_production(N2, vec![]).unwrap();
let n3_prod = grammar.add_production(N3, vec![T(T3)]).unwrap();
let n4_prod = grammar.add_production(N4, vec![T(T3)]).unwrap();
let grammar_data = GrammarData::new(&grammar).unwrap();
let core_item = LRItem::new(start_prod, &grammar_data).successor();
let mut state = LRState::new(vec![core_item.clone()], Some(Symbol::Token(T1)));
state.closure(&grammar_data);
let expected_items = [
core_item
.with_direct_derives(&[1, 2])
.with_derives(&[1, 2, 3, 4, 5, 6]),
LRItem::new(n1_prod_1, &grammar_data)
.with_direct_derives(&[3, 4])
.with_derives(&[3, 4, 6]),
LRItem::new(n1_prod_2, &grammar_data)
.with_direct_derives(&[5])
.with_derives(&[5]),
LRItem::new(n2_prod_1, &grammar_data)
.with_direct_derives(&[6])
.with_derives(&[6]),
LRItem::new(n2_prod_2, &grammar_data),
LRItem::new(n3_prod, &grammar_data),
LRItem::new(n4_prod, &grammar_data),
];
assert_eq!(expected_items.len(), state.items.len());
for (i, (expected, got)) in expected_items.iter().zip(&state.items).enumerate() {
assert_eq!(expected, got, "mismatch at index {i}")
}
}
#[test]
fn closure_with_annotations() {
use expressions::symbols::{E, PLUS, START, TIMES};
let (mut grammar, [add_prod, sub_prod, mul_prod, neg_prod, if_prod]) =
expressions::get_grammar_with_precedence();
let start_prod = grammar.add_production(START, vec![T(PLUS), N(E)]).unwrap();
let grammar_data = GrammarData::new(&grammar).unwrap();
let core_item = LRItem::new(start_prod, &grammar_data).successor();
let mut state = LRState::new(vec![core_item.clone()], Some(Symbol::Token(T1)));
state.closure(&grammar_data);
let expected_items = [
core_item
.with_direct_derives(&[1, 2, 3, 4, 5])
.with_derives(&[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]),
LRItem::new(add_prod, &grammar_data)
.with_direct_derives(&[6, 7, 8, 9])
.with_derives(&[6, 7, 8, 9, 10, 11]),
LRItem::new(sub_prod, &grammar_data)
.with_direct_derives(&[6, 7, 8, 9])
.with_derives(&[6, 7, 8, 9, 10, 11]),
LRItem::new(mul_prod, &grammar_data)
.with_direct_derives(&[10, 11])
.with_derives(&[10, 11]),
LRItem::new(neg_prod, &grammar_data),
LRItem::new(if_prod, &grammar_data),
LRItem::new(add_prod, &grammar_data)
.with_direct_derives(&[6, 7, 8, 9])
.with_derives(&[6, 7, 8, 9, 10, 11])
.with_annotations(PrecedenceAnnotations {
forbidden_left: Vec::new(),
forbidden_right: vec![Some(2)],
}),
LRItem::new(sub_prod, &grammar_data)
.with_direct_derives(&[6, 7, 8, 9])
.with_derives(&[6, 7, 8, 9, 10, 11])
.with_annotations(PrecedenceAnnotations {
forbidden_left: Vec::new(),
forbidden_right: vec![Some(2)],
}),
LRItem::new(mul_prod, &grammar_data)
.with_direct_derives(&[10, 11])
.with_derives(&[10, 11])
.with_annotations(PrecedenceAnnotations {
forbidden_left: Vec::new(),
forbidden_right: vec![Some(2)],
}),
LRItem::new(neg_prod, &grammar_data).with_annotations(PrecedenceAnnotations {
forbidden_left: Vec::new(),
forbidden_right: vec![Some(2)],
}),
LRItem::new(mul_prod, &grammar_data)
.with_direct_derives(&[10, 11])
.with_derives(&[10, 11])
.with_annotations(PrecedenceAnnotations {
forbidden_left: Vec::new(),
forbidden_right: vec![Some(4)],
}),
LRItem::new(neg_prod, &grammar_data).with_annotations(PrecedenceAnnotations {
forbidden_left: Vec::new(),
forbidden_right: vec![Some(4)],
}),
];
assert_eq!(expected_items.len(), state.items.len());
for (i, (expected, got)) in expected_items.iter().zip(&state.items).enumerate() {
assert_eq!(expected, got, "mismatch at index {i}")
}
let core_item1 = state.items[1].clone().successor().successor();
let core_item2 = state.items[6].clone().successor().successor();
let mut state = LRState::new(
vec![core_item1.clone(), core_item2.clone()],
Some(Symbol::Token(PLUS)),
);
state.closure(&grammar_data);
let expected_items = [
core_item1
.with_direct_derives(&[2, 3, 6])
.with_derives(&[2, 3, 5, 6, 8]),
core_item2
.with_direct_derives(&[4, 7])
.with_derives(&[4, 5, 7, 8]),
LRItem::new(neg_prod, &grammar_data),
LRItem::new(if_prod, &grammar_data),
LRItem::new(neg_prod, &grammar_data).with_annotations(PrecedenceAnnotations {
forbidden_left: Vec::new(),
forbidden_right: vec![Some(2)],
}),
LRItem::new(neg_prod, &grammar_data).with_annotations(PrecedenceAnnotations {
forbidden_left: Vec::new(),
forbidden_right: vec![Some(4)],
}),
LRItem::new(mul_prod, &grammar_data)
.with_direct_derives(&[5, 8])
.with_derives(&[5, 8])
.with_annotations(PrecedenceAnnotations {
forbidden_left: vec![Some(3)],
forbidden_right: Vec::new(),
}),
LRItem::new(mul_prod, &grammar_data)
.with_direct_derives(&[5, 8])
.with_derives(&[5, 8])
.with_annotations(PrecedenceAnnotations {
forbidden_left: vec![Some(3)],
forbidden_right: vec![Some(2)],
}),
LRItem::new(mul_prod, &grammar_data)
.with_direct_derives(&[5, 8])
.with_derives(&[5, 8])
.with_annotations(PrecedenceAnnotations {
forbidden_left: vec![Some(3)],
forbidden_right: vec![Some(4)],
}),
];
assert_eq!(expected_items.len(), state.items.len());
for (i, (expected, got)) in expected_items.iter().zip(&state.items).enumerate() {
assert_eq!(expected, got, "mismatch at index {i}")
}
let core_item = LRItem::new(mul_prod, &grammar_data).successor().successor();
let mut state = LRState::new(vec![core_item.clone()], Some(Symbol::Token(TIMES)));
state.closure(&grammar_data);
let expected_items = [
core_item.with_direct_derives(&[1, 2]).with_derives(&[1, 2]),
LRItem::new(neg_prod, &grammar_data),
LRItem::new(if_prod, &grammar_data),
];
assert_eq!(expected_items.len(), state.items.len());
for (i, (expected, got)) in expected_items.iter().zip(&state.items).enumerate() {
assert_eq!(expected, got, "mismatch at index {i}")
}
}
#[test]
fn closure_without_annotations_start_state() {
let mut grammar = Grammar::new();
let start_prod = grammar.add_production(START, vec![N(N1), N(N2)]).unwrap();
let n1_prod_1 = grammar.add_production(N1, vec![N(N2), T(T1)]).unwrap();
let n1_prod_2 = grammar.add_production(N1, vec![N(N3), T(T2)]).unwrap();
let n2_prod_1 = grammar.add_production(N2, vec![N(N4), N(N2)]).unwrap();
let n2_prod_2 = grammar.add_production(N2, vec![N(START), T(T4)]).unwrap();
let n2_prod_3 = grammar.add_production(N2, vec![]).unwrap();
let n3_prod = grammar.add_production(N3, vec![T(T3)]).unwrap();
let n4_prod = grammar.add_production(N4, vec![T(T3)]).unwrap();
let grammar_data = GrammarData::new(&grammar).unwrap();
let core_item = LRItem::new(start_prod, &grammar_data);
let mut state = LRState::new(vec![core_item.clone()], None);
state.closure(&grammar_data);
let expected_items = [
core_item
.with_direct_derives(&[1, 2])
.with_derives(&[0, 1, 2, 3, 4, 5, 6, 7]),
LRItem::new(n1_prod_1, &grammar_data)
.with_direct_derives(&[3, 4, 5])
.with_derives(&[0, 1, 2, 3, 4, 5, 6, 7]),
LRItem::new(n1_prod_2, &grammar_data)
.with_direct_derives(&[6])
.with_derives(&[6]),
LRItem::new(n2_prod_1, &grammar_data)
.with_direct_derives(&[7])
.with_derives(&[7]),
LRItem::new(n2_prod_2, &grammar_data)
.with_direct_derives(&[0])
.with_derives(&[0, 1, 2, 3, 4, 5, 6, 7]),
LRItem::new(n2_prod_3, &grammar_data),
LRItem::new(n3_prod, &grammar_data),
LRItem::new(n4_prod, &grammar_data),
];
assert_eq!(expected_items.len(), state.items.len());
for (i, (expected, got)) in expected_items.iter().zip(&state.items).enumerate() {
assert_eq!(expected, got, "mismatch at index {i}")
}
}
#[test]
fn closure_with_annotations_start_state() {
let (grammar, [add_prod, sub_prod, mul_prod, neg_prod, _]) =
expressions::get_grammar_with_precedence();
let grammar_data = GrammarData::new(&grammar).unwrap();
let core_item = LRItem::new(add_prod, &grammar_data);
let mut state = LRState::new(vec![core_item], None);
state.closure(&grammar_data);
let expected_items = [
LRItem::new(add_prod, &grammar_data)
.with_direct_derives(&[1, 2, 3, 4])
.with_derives(&[1, 2, 3, 4, 5, 6])
.with_annotations(PrecedenceAnnotations::default()),
LRItem::new(add_prod, &grammar_data)
.with_direct_derives(&[1, 2, 3, 4])
.with_derives(&[1, 2, 3, 4, 5, 6])
.with_annotations(PrecedenceAnnotations {
forbidden_left: Vec::new(),
forbidden_right: vec![Some(2)],
}),
LRItem::new(sub_prod, &grammar_data)
.with_direct_derives(&[1, 2, 3, 4])
.with_derives(&[1, 2, 3, 4, 5, 6])
.with_annotations(PrecedenceAnnotations {
forbidden_left: Vec::new(),
forbidden_right: vec![Some(2)],
}),
LRItem::new(mul_prod, &grammar_data)
.with_direct_derives(&[5, 6])
.with_derives(&[5, 6])
.with_annotations(PrecedenceAnnotations {
forbidden_left: Vec::new(),
forbidden_right: vec![Some(2)],
}),
LRItem::new(neg_prod, &grammar_data).with_annotations(PrecedenceAnnotations {
forbidden_left: Vec::new(),
forbidden_right: vec![Some(2)],
}),
LRItem::new(mul_prod, &grammar_data)
.with_direct_derives(&[5, 6])
.with_derives(&[5, 6])
.with_annotations(PrecedenceAnnotations {
forbidden_left: Vec::new(),
forbidden_right: vec![Some(4)],
}),
LRItem::new(neg_prod, &grammar_data).with_annotations(PrecedenceAnnotations {
forbidden_left: Vec::new(),
forbidden_right: vec![Some(4)],
}),
];
assert_eq!(expected_items.len(), state.items.len());
for (i, (expected, got)) in expected_items.iter().zip(&state.items).enumerate() {
assert_eq!(expected, got, "mismatch at index {i}")
}
}
#[test]
fn forbid_derivations() {
use crate::test_common::tokens::{EQUAL, INT};
let mut grammar = Grammar::new();
let start_prod = grammar.add_production(START, vec![N(E)]).unwrap();
let compare_prod = grammar
.add_production(E, vec![N(E), T(EQUAL), N(E)])
.unwrap();
let int_prod = grammar.add_production(E, vec![T(INT)]).unwrap();
grammar
.get_production_mut(compare_prod)
.unwrap()
.forbid_derivation(0, compare_prod)
.unwrap()
.forbid_derivation(2, compare_prod)
.unwrap();
let grammar_data = GrammarData::new(&grammar).unwrap();
let core_item = LRItem::new(start_prod, &grammar_data);
let mut state = LRState::new(vec![core_item.clone()], None);
state.closure(&grammar_data);
let expected_items = [
core_item.with_direct_derives(&[1, 2]).with_derives(&[1, 2]),
LRItem::new(compare_prod, &grammar_data)
.with_direct_derives(&[2])
.with_derives(&[2])
.with_forbidden_nodes(&[E]),
LRItem::new(int_prod, &grammar_data),
];
assert_eq!(expected_items.len(), state.items.len());
for (i, (expected, got)) in expected_items.iter().zip(&state.items).enumerate() {
assert_eq!(expected, got, "mismatch at index {i}")
}
let core_item = LRItem::new(compare_prod, &grammar_data)
.successor()
.successor();
let mut state = LRState::new(vec![core_item.clone()], Some(Symbol::Token(EQUAL)));
state.closure(&grammar_data);
let expected_items = [
core_item
.with_direct_derives(&[1])
.with_derives(&[1])
.with_forbidden_nodes(&[E]),
LRItem::new(int_prod, &grammar_data),
];
assert_eq!(expected_items.len(), state.items.len());
for (i, (expected, got)) in expected_items.iter().zip(&state.items).enumerate() {
assert_eq!(expected, got, "mismatch at index {i}")
}
}
}