Skip to main content

grammar_utils/
analysis.rs

1use std::collections::{BTreeMap, BTreeSet};
2
3use super::*;
4
5/// A structure for calculating the set of nullable nonterminals
6/// as well as the FIRST and FOLLOW sets for each nonterminal.
7pub struct GrammarAnalysis<'g> {
8    nullables: BTreeSet<Symbol<'g>>,
9    first_follows: FirstFollows<'g>,
10}
11
12impl<'g> GrammarAnalysis<'g> {
13    /// The constructor for `GrammarAnalysis`.
14    /// This builds the nullable set and the FIRST and FOLLOW sets for `grammar`.
15    pub fn build(grammar: &'g Grammar) -> GrammarAnalysis<'g> {
16        let nullables = Self::calc_nullables(grammar);
17        let first_follows = Self::calc_first_follows(grammar, &nullables);
18
19        GrammarAnalysis {
20            nullables,
21            first_follows,
22        }
23    }
24
25    /// Returns the set of nullable symbols.
26    ///
27    /// The result is a set of nonterminals.
28    /// A `Symbol` is nullable if it can expand into an empty string of terminals
29    /// through the application of some sequence of production rules.
30    pub fn nullables(&self) -> BTreeSet<Symbol<'g>> {
31        self.nullables.clone()
32    }
33
34    /// Returns whether the given symbol is nullable.
35    pub fn is_nullable(&self, symbol: Symbol<'g>) -> bool {
36        self.nullables.contains(&symbol)
37    }
38
39    pub fn first_seq(&self, seq: &[Symbol<'g>]) -> BTreeSet<Symbol<'g>> {
40        let mut result = BTreeSet::new();
41
42        for symbol in seq {
43            if symbol.is_terminal() {
44                result.insert(*symbol);
45                return result;
46            } else {
47                result.extend(self.first(*symbol));
48                if !self.is_nullable(*symbol) {
49                    return result;
50                }
51            }
52        }
53        result
54    }
55
56    pub fn is_nullable_seq(&self, seq: &[Symbol<'g>]) -> bool {
57        seq.iter().copied().all(|symbol| self.is_nullable(symbol))
58    }
59
60    /// Returns the FIRST set for a nonterminal `Symbol`.
61    ///
62    /// The result is a set of terminals.
63    /// A terminal is in the FIRST set of a nonterminal if some sequence of production rules
64    /// starting with that nonterminal expands to a string of terminals starting with that terminal.
65    pub fn first(&self, symbol: Symbol<'g>) -> BTreeSet<Symbol<'g>> {
66        self.first_follows.terminals_from(FFNode::First(symbol))
67    }
68
69    /// Returns the FOLLOW set for a nonterminal `Symbol`.
70    ///
71    /// The result is a set of terminals.
72    /// A terminal is in the FOLLOW set of a nonterminal that terminal could legally follow the
73    /// nonterminal during parsing.
74    pub fn follow(&self, symbol: Symbol<'g>) -> BTreeSet<Symbol<'g>> {
75        self.first_follows.terminals_from(FFNode::Follow(symbol))
76    }
77
78    pub fn can_end_with(&self, start_symbol: Symbol<'g>, symbol: Symbol<'g>) -> bool {
79        self.first_follows.follows_to_follow(start_symbol).contains(&symbol)
80    }
81
82    fn calc_nullables(grammar: &'g Grammar) -> BTreeSet<Symbol<'g>> {
83        let mut nullables = BTreeSet::new();
84
85        // Repeat until an iteration adds nothing new.
86        loop {
87            let mut dirty = false;
88
89            for rule in grammar.rules() {
90                // Skip rules whose LHS is already known to be nullable.
91                if !nullables.contains(&rule.lhs()) {
92                    // If we know (based on what we've seen so far) that all RHS symbols are nullable,
93                    // then the LHS is also nullable.
94                    if rule.rhs().iter().all(|symbol| nullables.contains(symbol)) {
95                        nullables.insert(rule.lhs());
96                        dirty = true;
97                    }
98                }
99            }
100
101            if !dirty {
102                break;
103            }
104        }
105
106        nullables
107    }
108
109    // Calculate the FirstFollows graph of the `Grammar`
110    fn calc_first_follows(grammar: &'g Grammar, nullables: &BTreeSet<Symbol<'g>>) -> FirstFollows<'g> {
111        let mut first_follows = FirstFollows::new();
112
113        for rule in grammar.rules() {
114            // Firsts are Firsts
115            // Note: We iterate the rule's RHS until we hit a non-nullable symbol
116            for symbol in rule.rhs() {
117                if symbol.is_terminal() {
118                    first_follows.link(FFNode::First(rule.lhs()), FFNode::Terminal(symbol));
119                } else {
120                    first_follows.link(FFNode::First(rule.lhs()), FFNode::First(symbol));
121                }
122
123                // if we'ved reached the last nullable nonterminal, break early
124                if !nullables.contains(&symbol) {
125                    break;
126                }
127            }
128
129            // Follows are Firsts, too
130            for (i, symbol) in rule.rhs().iter().copied().enumerate() {
131                // When `symbol`...
132                for j in i+1 .. rule.rhs().len() {
133                    // ... is followed by `follow`...
134                    let follow = rule.rhs()[j];
135
136                    if follow.is_terminal() {
137                        first_follows.link(FFNode::Follow(symbol), FFNode::Terminal(follow));
138                    } else {
139                        first_follows.link(FFNode::Follow(symbol), FFNode::First(follow));
140                    }
141
142                    // if we'ved reached the last nullable nonterminal, break early
143                    if !nullables.contains(&follow) {
144                        break;
145                    }
146                }
147            }
148
149            // Follows can also be follows, though
150            // Note: We iterate the rule's RHS in reverse until we hit a non-nullable symbol
151            for symbol in rule.rhs().into_iter().rev() {
152                if symbol.is_nonterminal() {
153                    first_follows.link(FFNode::Follow(symbol), FFNode::Follow(rule.lhs()));
154                }
155
156                if !nullables.contains(&symbol) {
157                    break;
158                }
159            }
160        }
161
162        first_follows
163    }
164}
165
166// A graph data structure which tracks containment information
167// for FIRST sets, FOLLOW sets, and temrinals
168struct FirstFollows<'g> {
169    // An adjacency list mapping `from_node` to `to_node`.
170    // This indicates that the set represented by `from_node` contains the set represented by `to_node`
171    edges: BTreeMap<FFNode<'g>, BTreeSet<FFNode<'g>>>,
172
173    rev_edges: BTreeMap<FFNode<'g>, BTreeSet<FFNode<'g>>>,
174}
175
176#[derive(Eq, PartialEq, Hash, Clone, Copy, PartialOrd, Ord)]
177enum FFNode<'g> {
178    // The FIRST set of the symbol
179    First(Symbol<'g>),
180    // The FOLLOW set of the symbol
181    Follow(Symbol<'g>),
182    // The singleton set containing just the given symbol
183    Terminal(Symbol<'g>),
184}
185
186impl<'g> FirstFollows<'g> {
187    fn new() -> FirstFollows<'g> {
188        FirstFollows {
189            edges: BTreeMap::new(),
190            rev_edges: BTreeMap::new(),
191        }
192    }
193
194    // Declare that the set represented by `from_node` contains the set represented by `to_node`.
195    // Note: If the `from_node` is not present in the graph already, it allocates a new adjacency list for it.
196    fn link(&mut self, from_node: FFNode<'g>, to_node: FFNode<'g>) {
197        if !self.edges.contains_key(&from_node) {
198            self.edges.insert(from_node, BTreeSet::new());
199        }
200        self.edges.get_mut(&from_node).unwrap().insert(to_node);
201
202        if !self.rev_edges.contains_key(&to_node) {
203            self.rev_edges.insert(to_node, BTreeSet::new());
204        }
205        self.rev_edges.get_mut(&to_node).unwrap().insert(from_node);
206    }
207
208    // Perform a breadth-first search of the graph starting from `from_node`
209    // and return the set of terminals reachable from it.
210    // These are precisely the set represented by the node.
211    fn terminals_from(&self, from_node: FFNode<'g>) -> BTreeSet<Symbol<'g>> {
212        let mut visited = BTreeSet::new();
213        let mut queue = vec![from_node];
214        let mut terminals = BTreeSet::new();
215
216        while let Some(node) = queue.pop() {
217            visited.insert(node);
218            if let FFNode::Terminal(symbol) = node {
219                terminals.insert(symbol);
220            } else if self.edges.contains_key(&node) {
221                for next_node in &self.edges[&node] {
222                    if !visited.contains(next_node) {
223                        queue.push(*next_node);
224                    }
225                }
226            }
227        }
228
229        terminals
230    }
231
232    // Find all nonterminals where `FOLLOW(start_symbol)` can be reached from `FOLLOW(N)`.
233    fn follows_to_follow(&self, start_symbol: Symbol<'g>) -> BTreeSet<Symbol<'g>> {
234        let mut visited = BTreeSet::new();
235        let mut queue = vec![FFNode::Follow(start_symbol)];
236        let mut nonterminals = BTreeSet::new();
237
238        while let Some(node) = queue.pop() {
239            visited.insert(node);
240            if let FFNode::Follow(symbol) = node {
241                nonterminals.insert(symbol);
242            }
243
244            if self.rev_edges.contains_key(&node) {
245                for next_node in &self.rev_edges[&node] {
246                    if !visited.contains(next_node) {
247                        queue.push(*next_node);
248                    }
249                }
250            }
251        }
252
253        nonterminals
254    }
255}