ielr/ielr.rs
1/*
2 * Copyright 2022 Arnaud Golfouse
3 *
4 * This Source Code Form is subject to the terms of the Mozilla Public
5 * License, v. 2.0. If a copy of the MPL was not distributed with this
6 * file, You can obtain one at https://mozilla.org/MPL/2.0/.
7 */
8
9//! Implementation of the structures and algorithms specific to IELR(1).
10
11mod item_relation;
12mod phase0;
13mod phase1;
14mod phase2;
15mod phase3;
16mod phase4;
17
18pub(crate) use self::{
19 phase0::Phase0, phase1::Phase1, phase2::Phase2, phase3::Phase3, phase4::Phase4,
20};
21
22#[cfg(test)]
23mod tests {
24 use crate::{
25 input::{
26 Grammar, Node, Symbol,
27 Symbol::{Node as N, Token as T},
28 },
29 lr::{LRState, LRTable, LALR},
30 GotoIdx, StateIdx,
31 };
32
33 pub(super) use crate::test_common::{
34 nodes::{N1, N2, N3, N4, N5, N6, N7, N8, START},
35 tokens::{T1, T2, T3, T4, T5},
36 };
37
38 pub(super) fn get_state<'a, Kind>(
39 table: &'a LRTable<Kind>,
40 path_from_start: &[Symbol],
41 ) -> Option<(StateIdx<Kind>, &'a LRState<Kind>)> {
42 let mut state_index = *table.starting_states.get(&START)?;
43 let mut state = table.get_state(state_index)?;
44 for symbol in path_from_start {
45 state_index = *state.transitions.get(symbol)?;
46 state = table.get_state(state_index)?;
47 }
48 Some((state_index, state))
49 }
50
51 /// Get a [`GotoIdx`] `g`, given
52 /// - `state_index`: the value of `goto_data.from_state[g]`.
53 /// - `node`: the value of `goto_data.symbol[g]`.
54 pub(super) fn get_goto(
55 goto_data: &GotoIndicesData<LALR>,
56 state_index: StateIdx<LALR>,
57 node: Node,
58 ) -> Option<GotoIdx> {
59 for ((&s, &index_node), g) in goto_data
60 .from_state
61 .iter()
62 .zip(&goto_data.symbol)
63 .zip(goto_data.get_all_goto_indices())
64 {
65 if s == state_index && index_node == node {
66 return Some(g);
67 }
68 }
69 None
70 }
71
72 /// Quickly declare all non-core items in a table.
73 ///
74 /// This will assert that these are all unique, and that we don't miss any.
75 macro_rules! get_gotos {
76 (in $table:expr;
77 $(
78 $($from_state:ident => $path_symbol:expr)? => $state_name:ident {
79 $($node:ident: $goto:ident),* $(,)?
80 };
81 )*
82 ) => {
83 use std::collections::HashMap;
84 let (goto_data, _) = $crate::ielr::Phase0::compute_goto_idx_tables(&$table);
85 let mut len = 0;
86 let mut state_indices = HashMap::new();
87 $(
88 let (state_index, state) = get_gotos!(@if_empty $($from_state)? {
89 $crate::ielr::tests::get_state(&$table, &[]).unwrap()
90 } else {
91 $(
92 let state_index = $table
93 .get_state(state_indices[stringify!($from_state)])
94 .unwrap()
95 .transitions[&$path_symbol];
96 let state = $table.get_state(state_index).unwrap();
97 (state_index, state)
98 )?
99 });
100 state_indices.insert(stringify!($state_name), state_index);
101
102 $(
103 let $goto = if state.uses_precedence {
104 unreachable!("this macro cannot handle grammars with precedence annotations")
105 } else {
106 $crate::ielr::tests::get_goto(&goto_data, state_index, $node).unwrap()
107 };
108 len += 1;
109 )*
110 // no missing goto
111 let all_nodes = [$($node),*];
112 assert!(
113 state
114 .all_items()
115 .iter()
116 .all(|item| item.index > 0 || all_nodes.contains(&item.prod_idx.lhs))
117 );
118 )*
119 // all gotos are unique
120 assert_eq!([$($($goto,)*)*].into_iter().collect::<$crate::structures::Set<$crate::GotoIdx>>().len(), len);
121 };
122 (@if_empty { $($then:tt)* } else { $($otherwise:tt)* }) => {
123 {$($then)*}
124 };
125 (@if_empty $x:tt { $($then:tt)* } else { $($otherwise:tt)* }) => {
126 {$($otherwise)*}
127 };
128 }
129
130 pub(super) use get_gotos;
131
132 use super::phase0::GotoIndicesData;
133
134 /// ```text
135 /// START → T1 N1 N2 N6
136 /// START → T2 N4
137 /// N1 → T2
138 /// N2 → N3
139 /// N3 →
140 /// N3 → T3
141 /// N4 → N5 N3
142 /// N5 → T4
143 /// N6 → T1 T2
144 /// N6 → T4
145 /// ```
146 ///
147 /// The LALR state machine is: (order of states may change)
148 /// ```text
149 /// 0: START →∙T1 N1 N2 N6
150 /// START →∙T2 N4
151 /// 1: START → T1∙N1 N2 N6
152 /// N1 →∙T2
153 /// 2: START → T2∙N4
154 /// N4 →∙N5 N3
155 /// N5 →∙T4
156 /// 3: START → T1 N1∙N2 N6
157 /// N2 →∙N3
158 /// N3 →∙
159 /// N3 →∙T3
160 /// 4: N1 → T2∙
161 /// 5: START → T2 N4∙
162 /// 6: N4 → N5∙N3
163 /// N3 →∙
164 /// N3 →∙T3
165 /// 7: N5 → T4∙
166 /// 8: START → T1 N1 N2∙N6
167 /// N6 →∙T1 T2
168 /// N6 →∙T4
169 /// 9: N2 → N3∙
170 /// 10: N3 → T3∙
171 /// 11: N4 → N5 N3∙
172 /// 12: START → T1 N1 N2 N6∙
173 /// 13: N6 → T1∙T2
174 /// 14: N6 → T4∙
175 /// 15: N6 → T1 T2∙
176 /// ```
177 pub(super) fn grammar_1() -> Grammar {
178 let mut grammar = Grammar::new();
179 for (lhs, rhs) in [
180 (START, vec![T(T1), N(N1), N(N2), N(N6)]),
181 (START, vec![T(T2), N(N4)]),
182 (N1, vec![T(T2)]),
183 (N2, vec![N(N3)]),
184 (N3, vec![]),
185 (N3, vec![T(T3)]),
186 (N4, vec![N(N5), N(N3)]),
187 (N5, vec![T(T4)]),
188 (N6, vec![T(T1), T(T2)]),
189 (N6, vec![T(T4)]),
190 ] {
191 grammar.add_production(lhs, rhs).unwrap();
192 }
193
194 grammar
195 }
196
197 /// ```text
198 /// START → T1 N1 N2 N6
199 /// N1 → N3
200 /// N2 → T2 N4 N5
201 /// N2 → T2 N6
202 /// N3 →
203 /// N3 → N7
204 /// N4 → T1
205 /// N5 →
206 /// N5 → T5
207 /// N6 → N7
208 /// N7 → N8 N3
209 /// N7 → T3
210 /// N8 → T1
211 /// ```
212 ///
213 /// The LALR state machine is: (order of states may change)
214 /// ```text
215 /// 0: START →∙T1 N1 N2 N6 i0 { EOF }
216 /// 1: START → T1∙N1 N2 N6 { EOF }
217 /// N1 →∙N3 i1_1 { T2 }
218 /// N3 →∙ i1_2 { T2 }
219 /// N3 →∙N7 i1_3 { T2 }
220 /// N7 →∙N8 N3 i1_4 { T2 }
221 /// N7 →∙T3 i1_5 { T2 }
222 /// N8 →∙T1 i1_6 { T1, T2, T3 }
223 /// 2: START → T1 N1∙N2 N6 { EOF }
224 /// N2 →∙T2 N4 N5 i2_1 { T1, T3 }
225 /// N2 →∙T2 N6 i2_2 { T1, T3 }
226 /// 3: START → T1 N1 N2∙N6 { EOF }
227 /// N6 →∙N7 i3_1 { EOF }
228 /// N7 →∙N8 N3 i3_2 { EOF }
229 /// N7 →∙T3 i3_3 { EOF }
230 /// N8 →∙T1 i3_4 { EOF, T1, T2, T3 }
231 /// 4: START → T1 N1 N2 N6∙ { EOF }
232 /// 5: N1 → N3∙ { T2 }
233 /// 6: N3 → N7∙ { EOF, T1, T2, T3 }
234 /// 7: N7 → N8∙N3 { EOF, T1, T2, T3 }
235 /// N3 →∙ i7_1 { EOF, T1, T2, T3 }
236 /// N3 →∙N7 i7_2 { EOF, T1, T2, T3 }
237 /// N7 →∙N8 N3 i7_3 { EOF, T1, T2, T3 }
238 /// N7 →∙T3 i7_4 { EOF, T1, T2, T3 }
239 /// N8 →∙T1 i7_5 { EOF, T1, T2, T3 }
240 /// 8: N7 → T3∙ { EOF, T1, T2, T3 }
241 /// 9: N2 → T2∙N4 N5 { T1, T3 }
242 /// N2 → T2∙N6 { T1, T3 }
243 /// N4 →∙T1 i9_1 { T1, T3, T5 }
244 /// N6 →∙N7 i9_2 { T1, T3 }
245 /// N7 →∙N8 N3 i9_3 { T1, T3 }
246 /// N7 →∙T3 i9_4 { T1, T3 }
247 /// N8 →∙T1 i9_5 { T1, T3 }
248 /// 10: N2 → T2 N4∙N5 { T1, T3 }
249 /// N5 →∙ i10_1 { T1, T3 }
250 /// N5 →∙T5 i10_2 { T1, T3 }
251 /// 11: N2 → T2 N4 N5∙ { T1, T3 }
252 /// 12: N2 → T2 N6∙ { T1, T3 }
253 /// 13: N6 → N7∙ { EOF, T1, T3 }
254 /// 14: N8 → T1∙ { EOF, T1, T2, T3 }
255 /// 15: N7 → N8 N3∙ { EOF, T1, T2, T3 }
256 /// 16: N4 → T1∙ { T1, T3, T5 }
257 /// N8 → T1∙ { T1, T3 }
258 /// 17: N5 → T5∙ { T1, T3 }
259 /// 18: END STATE
260 /// ```
261 pub(super) fn grammar_2() -> Grammar {
262 let mut grammar = Grammar::new();
263 for (lhs, rhs) in [
264 (START, vec![T(T1), N(N1), N(N2), N(N6)]),
265 (N1, vec![N(N3)]),
266 (N2, vec![T(T2), N(N4), N(N5)]),
267 (N2, vec![T(T2), N(N6)]),
268 (N3, vec![]),
269 (N3, vec![N(N7)]),
270 (N4, vec![T(T1)]),
271 (N5, vec![]),
272 (N5, vec![T(T5)]),
273 (N6, vec![N(N7)]),
274 (N7, vec![N(N8), N(N3)]),
275 (N7, vec![T(T3)]),
276 (N8, vec![T(T1)]),
277 ] {
278 grammar.add_production(lhs, rhs).unwrap();
279 }
280
281 grammar
282 }
283
284 /// ```text
285 /// START → T1 T2 N1
286 /// START → T1 T2 N2
287 /// N1 → T3
288 /// N2 → T3
289 /// ```
290 ///
291 /// The LALR state machine is: (order of states may change)
292 /// ```text
293 /// 0: START →∙T1 T2 N1 { EOF }
294 /// START →∙T1 T2 N2 { EOF }
295 /// 1: START → T1∙T2 N1 { EOF }
296 /// START → T1∙T2 N2 { EOF }
297 /// 2: START → T1 T2∙N1 { EOF }
298 /// START → T1 T2∙N2 { EOF }
299 /// N1 →∙T3 { EOF }
300 /// N2 →∙T3 { EOF }
301 /// 3: N1 → T3∙ { EOF }
302 /// N2 → T3∙ { EOF }
303 /// 4: START → T1 T2 N1∙ { EOF }
304 /// 5: START → T1 T2 N2∙ { EOF }
305 /// 6: END STATE
306 /// ```
307 pub(super) fn grammar_3() -> Grammar {
308 let mut grammar = Grammar::new();
309 for (lhs, rhs) in [
310 (START, vec![T(T1), T(T2), N(N1)]),
311 (START, vec![T(T1), T(T2), N(N2)]),
312 (N1, vec![T(T3)]),
313 (N2, vec![T(T3)]),
314 ] {
315 grammar.add_production(lhs, rhs).unwrap();
316 }
317
318 grammar
319 }
320}