grammar_utils/lr1/state.rs
1use crate::*;
2use super::*;
3
4/// An LR(1) state.
5/// Consists of an LR(1) itemset.
6#[derive(Clone)]
7pub struct State<'g> {
8 grammar: &'g Grammar,
9 items: Vec<Item<'g>>,
10}
11
12// TODO contents should be private
13/// The index of a given state.
14#[derive(Debug)]
15#[derive(Clone, Copy, Eq, PartialEq, Hash, Ord, PartialOrd)]
16pub struct StateIndex(pub usize);
17
18impl From<StateIndex> for usize {
19 fn from(value: StateIndex) -> Self {
20 value.0
21 }
22}
23
24impl<'g> std::fmt::Debug for State<'g> {
25 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
26 for item in self.items() {
27 writeln!(f, "{item:?}")?;
28 }
29 Ok(())
30 }
31}
32
33impl<'g> PartialEq for State<'g> {
34 fn eq(&self, other: &Self) -> bool {
35 std::ptr::eq(self.grammar, other.grammar) && self.items == other.items
36 }
37}
38
39impl<'g> Eq for State<'g> {}
40
41impl<'g> State<'g> {
42 /// Get the underlying `Grammar` for this state.
43 pub fn grammar(&self) -> &'g Grammar {
44 self.grammar
45 }
46
47 /// Get the itemset for this state.
48 pub fn items(&self) -> &[Item<'g>] {
49 self.items.as_slice()
50 }
51
52 /// Generates the state representing the closure of a single item.
53 pub(crate) fn singleton(item: Item<'g>, analysis: &GrammarAnalysis<'g>) -> Self {
54 let grammar: &'g Grammar = item.grammar();
55 let itemset = State {
56 grammar,
57 items: vec![item],
58 };
59 itemset.closure(analysis)
60 }
61
62 /// Calculate the ε-closure of the items in this state.
63 ///
64 /// This captures the fact that when the item is ready to accept a nonterminal,
65 /// it is equivalently ready to begin parsing that nonterminal
66 /// using of the rules in the grammar.
67 pub(crate) fn closure(&self, analysis: &GrammarAnalysis<'g>) -> State<'g> {
68 let mut itemset = self.items.clone();
69
70 // Iterate repeatedly until no new items are found.
71 // (That is, until `dirty` stays `false`.
72 loop {
73 let mut dirty = false;
74 // We're about to iterate over itemset, so we create a buffer
75 // to avoid iterator invalidation.
76 let mut new_items = vec![];
77
78 for item in &itemset {
79 // If we're not at the end of the rule...
80 if let Some(next_symbol) = item.next_symbol() {
81 // Calculate the look ahead.
82 //
83 // This step is done for LR(1) but not for LR(0).
84 //
85 // The lookahead is based on the symbol *following* the symbol at the cursor.
86 // If it's a nonterminal, we take its FIRST set as the lookahead.
87 // If it's a terminal, that becomes the (only) lookahead symbol.
88 //
89 // In the case the cursor points to the last symbol in the rule,
90 // we keep the same lookahead.
91 let lookahead = if let Some(next_next_symbol) = item.next_next_symbol() {
92 if next_next_symbol.is_nonterminal() {
93 analysis.first(next_next_symbol).into_iter().map(|symbol| Some(symbol)).collect()
94 } else {
95 [Some(next_next_symbol)].into_iter().collect()
96 }
97 } else {
98 item.lookahead().clone()
99 };
100
101 // If the cursor points at a nonterminal,
102 // find all of the rules for that nonterminal and add them
103 // (if they aren't already in the itemset).
104 //
105 // Adding this rule may enable even more items in the next iteration.
106 // Set `dirty` to `true` to indicate we need to iterate again.
107 if next_symbol.is_nonterminal() {
108 let symbol_rules = self.grammar
109 .rules()
110 .into_iter()
111 .filter(|rule| {
112 rule.lhs() == next_symbol
113 });
114
115 for rule in symbol_rules {
116 let item = Item::new(rule, 0, lookahead.clone());
117 if !itemset.contains(&item) {
118 new_items.push(item);
119 dirty = true;
120 }
121 }
122 }
123 }
124
125 }
126
127 // Now that we're done iterating, we can safely copy the items into the itemset.
128 for item in new_items {
129 if !itemset.contains(&item) {
130 itemset.push(item);
131 }
132 }
133
134 // And if we iterated without changing anything,
135 // then we're done.
136 if !dirty {
137 break;
138 }
139 }
140
141 State {
142 grammar: self.grammar,
143 items: itemset,
144 }
145 }
146
147 // Take the current state and calculate which state is reached
148 // when it shifts `symbol` onto the stack.
149 //
150 // This has the effect of going through each item and advancing the cursor
151 // for any item where the symbol at the cursor is `symbol`.
152 // For any item where the symbol at the cursor is something else,
153 // or in the case that the cursor is at the end,
154 // the item is instead discarded.
155 //
156 // The result is then the ε-closure of the resulting itemset.
157 pub fn follow(&self, analysis: &GrammarAnalysis<'g>, symbol: Symbol<'g>) -> State<'g> {
158 let mut items = vec![];
159 for item in &self.items {
160 if let Some(next_symbol) = item.next_symbol() {
161 if next_symbol == symbol {
162 items.push(item.step().unwrap());
163 }
164 }
165 }
166
167 let itemset = State {
168 grammar: self.grammar,
169 items,
170 };
171
172 itemset.closure(analysis)
173 }
174}