ielr/
ielr.rs

1/*
2 * Copyright 2022 Arnaud Golfouse
3 *
4 * This Source Code Form is subject to the terms of the Mozilla Public
5 * License, v. 2.0. If a copy of the MPL was not distributed with this
6 * file, You can obtain one at https://mozilla.org/MPL/2.0/.
7 */
8
9//! Implementation of the structures and algorithms specific to IELR(1).
10
11mod item_relation;
12mod phase0;
13mod phase1;
14mod phase2;
15mod phase3;
16mod phase4;
17
18pub(crate) use self::{
19    phase0::Phase0, phase1::Phase1, phase2::Phase2, phase3::Phase3, phase4::Phase4,
20};
21
22#[cfg(test)]
23mod tests {
24    use crate::{
25        input::{
26            Grammar, Node, Symbol,
27            Symbol::{Node as N, Token as T},
28        },
29        lr::{LRState, LRTable, LALR},
30        GotoIdx, StateIdx,
31    };
32
33    pub(super) use crate::test_common::{
34        nodes::{N1, N2, N3, N4, N5, N6, N7, N8, START},
35        tokens::{T1, T2, T3, T4, T5},
36    };
37
38    pub(super) fn get_state<'a, Kind>(
39        table: &'a LRTable<Kind>,
40        path_from_start: &[Symbol],
41    ) -> Option<(StateIdx<Kind>, &'a LRState<Kind>)> {
42        let mut state_index = *table.starting_states.get(&START)?;
43        let mut state = table.get_state(state_index)?;
44        for symbol in path_from_start {
45            state_index = *state.transitions.get(symbol)?;
46            state = table.get_state(state_index)?;
47        }
48        Some((state_index, state))
49    }
50
51    /// Get a [`GotoIdx`] `g`, given
52    /// - `state_index`: the value of `goto_data.from_state[g]`.
53    /// - `node`: the value of `goto_data.symbol[g]`.
54    pub(super) fn get_goto(
55        goto_data: &GotoIndicesData<LALR>,
56        state_index: StateIdx<LALR>,
57        node: Node,
58    ) -> Option<GotoIdx> {
59        for ((&s, &index_node), g) in goto_data
60            .from_state
61            .iter()
62            .zip(&goto_data.symbol)
63            .zip(goto_data.get_all_goto_indices())
64        {
65            if s == state_index && index_node == node {
66                return Some(g);
67            }
68        }
69        None
70    }
71
72    /// Quickly declare all non-core items in a table.
73    ///
74    /// This will assert that these are all unique, and that we don't miss any.
75    macro_rules! get_gotos {
76        (in $table:expr;
77            $(
78                $($from_state:ident => $path_symbol:expr)? => $state_name:ident {
79                    $($node:ident: $goto:ident),* $(,)?
80                };
81            )*
82        ) => {
83            use std::collections::HashMap;
84            let (goto_data, _) = $crate::ielr::Phase0::compute_goto_idx_tables(&$table);
85            let mut len = 0;
86            let mut state_indices = HashMap::new();
87            $(
88                let (state_index, state) = get_gotos!(@if_empty $($from_state)? {
89                    $crate::ielr::tests::get_state(&$table, &[]).unwrap()
90                } else {
91                    $(
92                        let state_index = $table
93                            .get_state(state_indices[stringify!($from_state)])
94                            .unwrap()
95                            .transitions[&$path_symbol];
96                        let state = $table.get_state(state_index).unwrap();
97                        (state_index, state)
98                    )?
99                });
100                state_indices.insert(stringify!($state_name), state_index);
101
102                $(
103                    let $goto = if state.uses_precedence {
104                        unreachable!("this macro cannot handle grammars with precedence annotations")
105                    } else {
106                        $crate::ielr::tests::get_goto(&goto_data, state_index, $node).unwrap()
107                    };
108                    len += 1;
109                )*
110                // no missing goto
111                let all_nodes = [$($node),*];
112                assert!(
113                    state
114                        .all_items()
115                        .iter()
116                        .all(|item| item.index > 0 || all_nodes.contains(&item.prod_idx.lhs))
117                );
118            )*
119            // all gotos are unique
120            assert_eq!([$($($goto,)*)*].into_iter().collect::<$crate::structures::Set<$crate::GotoIdx>>().len(), len);
121        };
122        (@if_empty { $($then:tt)* } else { $($otherwise:tt)* }) => {
123            {$($then)*}
124        };
125        (@if_empty $x:tt { $($then:tt)* } else { $($otherwise:tt)* }) => {
126            {$($otherwise)*}
127        };
128    }
129
130    pub(super) use get_gotos;
131
132    use super::phase0::GotoIndicesData;
133
134    /// ```text
135    /// START → T1 N1 N2 N6
136    /// START → T2 N4
137    /// N1 → T2
138    /// N2 → N3
139    /// N3 →
140    /// N3 → T3
141    /// N4 → N5 N3
142    /// N5 → T4
143    /// N6 → T1 T2
144    /// N6 → T4
145    /// ```
146    ///
147    /// The LALR state machine is: (order of states may change)
148    /// ```text
149    /// 0:  START →∙T1 N1 N2 N6
150    ///     START →∙T2 N4
151    /// 1:  START → T1∙N1 N2 N6
152    ///     N1 →∙T2
153    /// 2:  START → T2∙N4
154    ///     N4 →∙N5 N3
155    ///     N5 →∙T4
156    /// 3:  START → T1 N1∙N2 N6
157    ///     N2 →∙N3
158    ///     N3 →∙
159    ///     N3 →∙T3
160    /// 4:  N1 → T2∙
161    /// 5:  START → T2 N4∙
162    /// 6:  N4 → N5∙N3
163    ///     N3 →∙
164    ///     N3 →∙T3
165    /// 7:  N5 → T4∙
166    /// 8:  START → T1 N1 N2∙N6
167    ///     N6 →∙T1 T2
168    ///     N6 →∙T4
169    /// 9:  N2 → N3∙
170    /// 10: N3 → T3∙
171    /// 11: N4 → N5 N3∙
172    /// 12: START → T1 N1 N2 N6∙
173    /// 13: N6 → T1∙T2
174    /// 14: N6 → T4∙
175    /// 15: N6 → T1 T2∙
176    /// ```
177    pub(super) fn grammar_1() -> Grammar {
178        let mut grammar = Grammar::new();
179        for (lhs, rhs) in [
180            (START, vec![T(T1), N(N1), N(N2), N(N6)]),
181            (START, vec![T(T2), N(N4)]),
182            (N1, vec![T(T2)]),
183            (N2, vec![N(N3)]),
184            (N3, vec![]),
185            (N3, vec![T(T3)]),
186            (N4, vec![N(N5), N(N3)]),
187            (N5, vec![T(T4)]),
188            (N6, vec![T(T1), T(T2)]),
189            (N6, vec![T(T4)]),
190        ] {
191            grammar.add_production(lhs, rhs).unwrap();
192        }
193
194        grammar
195    }
196
197    /// ```text
198    /// START → T1 N1 N2 N6
199    /// N1 → N3
200    /// N2 → T2 N4 N5
201    /// N2 → T2 N6
202    /// N3 →
203    /// N3 → N7
204    /// N4 → T1
205    /// N5 →
206    /// N5 → T5
207    /// N6 → N7
208    /// N7 → N8 N3
209    /// N7 → T3
210    /// N8 → T1
211    /// ```
212    ///
213    /// The LALR state machine is: (order of states may change)
214    /// ```text
215    /// 0:  START →∙T1 N1 N2 N6    i0        { EOF }
216    /// 1:  START → T1∙N1 N2 N6              { EOF }
217    ///     N1 →∙N3                i1_1      { T2 }
218    ///     N3 →∙                  i1_2      { T2 }
219    ///     N3 →∙N7                i1_3      { T2 }
220    ///     N7 →∙N8 N3             i1_4      { T2 }
221    ///     N7 →∙T3                i1_5      { T2 }
222    ///     N8 →∙T1                i1_6      { T1, T2, T3 }
223    /// 2:  START → T1 N1∙N2 N6              { EOF }
224    ///     N2 →∙T2 N4 N5          i2_1      { T1, T3 }
225    ///     N2 →∙T2 N6             i2_2      { T1, T3 }
226    /// 3:  START → T1 N1 N2∙N6              { EOF }
227    ///     N6 →∙N7                i3_1      { EOF }
228    ///     N7 →∙N8 N3             i3_2      { EOF }
229    ///     N7 →∙T3                i3_3      { EOF }
230    ///     N8 →∙T1                i3_4      { EOF, T1, T2, T3 }
231    /// 4:  START → T1 N1 N2 N6∙             { EOF }
232    /// 5:  N1 → N3∙                         { T2 }
233    /// 6:  N3 → N7∙                         { EOF, T1, T2, T3 }
234    /// 7:  N7 → N8∙N3                       { EOF, T1, T2, T3 }
235    ///     N3 →∙                  i7_1      { EOF, T1, T2, T3 }
236    ///     N3 →∙N7                i7_2      { EOF, T1, T2, T3 }
237    ///     N7 →∙N8 N3             i7_3      { EOF, T1, T2, T3 }
238    ///     N7 →∙T3                i7_4      { EOF, T1, T2, T3 }
239    ///     N8 →∙T1                i7_5      { EOF, T1, T2, T3 }
240    /// 8:  N7 → T3∙                         { EOF, T1, T2, T3 }
241    /// 9:  N2 → T2∙N4 N5                    { T1, T3 }
242    ///     N2 → T2∙N6                       { T1, T3 }
243    ///     N4 →∙T1                i9_1      { T1, T3, T5 }
244    ///     N6 →∙N7                i9_2      { T1, T3 }
245    ///     N7 →∙N8 N3             i9_3      { T1, T3 }
246    ///     N7 →∙T3                i9_4      { T1, T3 }
247    ///     N8 →∙T1                i9_5      { T1, T3 }
248    /// 10: N2 → T2 N4∙N5                    { T1, T3 }
249    ///     N5 →∙                  i10_1     { T1, T3 }
250    ///     N5 →∙T5                i10_2     { T1, T3 }
251    /// 11: N2 → T2 N4 N5∙                   { T1, T3 }
252    /// 12: N2 → T2 N6∙                      { T1, T3 }
253    /// 13: N6 → N7∙                         { EOF, T1, T3 }
254    /// 14: N8 → T1∙                         { EOF, T1, T2, T3 }
255    /// 15: N7 → N8 N3∙                      { EOF, T1, T2, T3 }
256    /// 16: N4 → T1∙                         { T1, T3, T5 }
257    ///     N8 → T1∙                         { T1, T3 }
258    /// 17: N5 → T5∙                         { T1, T3 }
259    /// 18: END STATE
260    /// ```
261    pub(super) fn grammar_2() -> Grammar {
262        let mut grammar = Grammar::new();
263        for (lhs, rhs) in [
264            (START, vec![T(T1), N(N1), N(N2), N(N6)]),
265            (N1, vec![N(N3)]),
266            (N2, vec![T(T2), N(N4), N(N5)]),
267            (N2, vec![T(T2), N(N6)]),
268            (N3, vec![]),
269            (N3, vec![N(N7)]),
270            (N4, vec![T(T1)]),
271            (N5, vec![]),
272            (N5, vec![T(T5)]),
273            (N6, vec![N(N7)]),
274            (N7, vec![N(N8), N(N3)]),
275            (N7, vec![T(T3)]),
276            (N8, vec![T(T1)]),
277        ] {
278            grammar.add_production(lhs, rhs).unwrap();
279        }
280
281        grammar
282    }
283
284    /// ```text
285    /// START → T1 T2 N1
286    /// START → T1 T2 N2
287    /// N1 → T3
288    /// N2 → T3
289    /// ```
290    ///
291    /// The LALR state machine is: (order of states may change)
292    /// ```text
293    /// 0:  START →∙T1 T2 N1  { EOF }
294    ///     START →∙T1 T2 N2  { EOF }
295    /// 1:  START → T1∙T2 N1  { EOF }
296    ///     START → T1∙T2 N2  { EOF }
297    /// 2:  START → T1 T2∙N1  { EOF }
298    ///     START → T1 T2∙N2  { EOF }
299    ///     N1 →∙T3           { EOF }
300    ///     N2 →∙T3           { EOF }
301    /// 3:  N1 → T3∙          { EOF }
302    ///     N2 → T3∙          { EOF }
303    /// 4:  START → T1 T2 N1∙ { EOF }
304    /// 5:  START → T1 T2 N2∙ { EOF }
305    /// 6:  END STATE
306    /// ```
307    pub(super) fn grammar_3() -> Grammar {
308        let mut grammar = Grammar::new();
309        for (lhs, rhs) in [
310            (START, vec![T(T1), T(T2), N(N1)]),
311            (START, vec![T(T1), T(T2), N(N2)]),
312            (N1, vec![T(T3)]),
313            (N2, vec![T(T3)]),
314        ] {
315            grammar.add_production(lhs, rhs).unwrap();
316        }
317
318        grammar
319    }
320}