Skip to main content

grammar_utils/lr1/
state.rs

1use crate::*;
2use super::*;
3
4/// An LR(1) state.
5/// Consists of an LR(1) itemset.
6#[derive(Clone)]
7pub struct State<'g> {
8    grammar: &'g Grammar,
9    items: Vec<Item<'g>>,
10}
11
12// TODO contents should be private
13/// The index of a given state.
14#[derive(Debug)]
15#[derive(Clone, Copy, Eq, PartialEq, Hash, Ord, PartialOrd)]
16pub struct StateIndex(pub usize);
17
18impl From<StateIndex> for usize {
19    fn from(value: StateIndex) -> Self {
20        value.0
21    }
22}
23
24impl<'g> std::fmt::Debug for State<'g> {
25    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
26        for item in self.items() {
27            writeln!(f, "{item:?}")?;
28        }
29        Ok(())
30    }
31}
32
33impl<'g> PartialEq for State<'g> {
34    fn eq(&self, other: &Self) -> bool {
35        std::ptr::eq(self.grammar, other.grammar) && self.items == other.items
36    }
37}
38
39impl<'g> Eq for State<'g> {}
40
41impl<'g> State<'g> {
42    /// Get the underlying `Grammar` for this state.
43    pub fn grammar(&self) -> &'g Grammar {
44        self.grammar
45    }
46
47    /// Get the itemset for this state.
48    pub fn items(&self) -> &[Item<'g>] {
49        self.items.as_slice()
50    }
51
52    /// Generates the state representing the closure of a single item.
53    pub(crate) fn singleton(item: Item<'g>, analysis: &GrammarAnalysis<'g>) -> Self {
54        let grammar: &'g Grammar = item.grammar();
55        let itemset = State {
56            grammar,
57            items: vec![item],
58        };
59        itemset.closure(analysis)
60    }
61
62    /// Calculate the ε-closure of the items in this state.
63    ///
64    /// This captures the fact that when the item is ready to accept a nonterminal,
65    /// it is equivalently ready to begin parsing that nonterminal
66    /// using of the rules in the grammar.
67    pub(crate) fn closure(&self, analysis: &GrammarAnalysis<'g>) -> State<'g> {
68        let mut itemset = self.items.clone();
69
70        // Iterate repeatedly until no new items are found.
71        // (That is, until `dirty` stays `false`.
72        loop {
73            let mut dirty = false;
74            // We're about to iterate over itemset, so we create a buffer
75            // to avoid iterator invalidation.
76            let mut new_items = vec![];
77
78            for item in &itemset {
79                // If we're not at the end of the rule...
80                if let Some(next_symbol) = item.next_symbol() {
81                    // Calculate the look ahead.
82                    //
83                    // This step is done for LR(1) but not for LR(0).
84                    //
85                    // The lookahead is based on the symbol *following* the symbol at the cursor.
86                    // If it's a nonterminal, we take its FIRST set as the lookahead.
87                    // If it's a terminal, that becomes the (only) lookahead symbol.
88                    //
89                    // In the case the cursor points to the last symbol in the rule,
90                    // we keep the same lookahead.
91                    let lookahead = if let Some(next_next_symbol) = item.next_next_symbol() {
92                        if next_next_symbol.is_nonterminal() {
93                            analysis.first(next_next_symbol).into_iter().map(|symbol| Some(symbol)).collect()
94                        } else {
95                            [Some(next_next_symbol)].into_iter().collect()
96                        }
97                    } else {
98                        item.lookahead().clone()
99                    };
100
101                    // If the cursor points at a nonterminal,
102                    // find all of the rules for that nonterminal and add them
103                    // (if they aren't already in the itemset).
104                    //
105                    // Adding this rule may enable even more items in the next iteration.
106                    // Set `dirty` to `true` to indicate we need to iterate again.
107                    if next_symbol.is_nonterminal() {
108                        let symbol_rules = self.grammar
109                            .rules()
110                            .into_iter()
111                            .filter(|rule| {
112                                rule.lhs() == next_symbol
113                            });
114
115                        for rule in symbol_rules {
116                            let item = Item::new(rule, 0, lookahead.clone());
117                            if !itemset.contains(&item) {
118                                new_items.push(item);
119                                dirty = true;
120                            }
121                        }
122                    }
123                }
124
125            }
126
127            // Now that we're done iterating, we can safely copy the items into the itemset.
128            for item in new_items {
129                if !itemset.contains(&item) {
130                    itemset.push(item);
131                }
132            }
133
134            // And if we iterated without changing anything,
135            // then we're done.
136            if !dirty {
137                break;
138            }
139        }
140
141        State {
142            grammar: self.grammar,
143            items: itemset,
144        }
145    }
146
147    // Take the current state and calculate which state is reached
148    // when it shifts `symbol` onto the stack.
149    //
150    // This has the effect of going through each item and advancing the cursor
151    // for any item where the symbol at the cursor is `symbol`.
152    // For any item where the symbol at the cursor is something else,
153    // or in the case that the cursor is at the end,
154    // the item is instead discarded.
155    //
156    // The result is then the ε-closure of the resulting itemset.
157    pub fn follow(&self, analysis: &GrammarAnalysis<'g>, symbol: Symbol<'g>) -> State<'g> {
158        let mut items = vec![];
159        for item in &self.items {
160            if let Some(next_symbol) = item.next_symbol() {
161                if next_symbol == symbol {
162                    items.push(item.step().unwrap());
163                }
164            }
165        }
166
167        let itemset = State {
168            grammar: self.grammar,
169            items,
170        };
171
172        itemset.closure(analysis)
173    }
174}