1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
use std::collections::{HashMap, HashSet};
use super::*;
/// A structure for calculating the set of nullable nonterminals
/// as well as the FIRST and FOLLOW sets for each nonterminal.
pub struct GrammarAnalysis<'g> {
nullables: HashSet<Symbol<'g>>,
first_follows: FirstFollows<'g>,
}
impl<'g> GrammarAnalysis<'g> {
/// The constructor for `GrammarAnalysis`.
/// This builds the nullable set and the FIRST and FOLLOW sets for `grammar`.
pub fn build(grammar: &'g Grammar) -> GrammarAnalysis<'g> {
let nullables = Self::calc_nullables(grammar);
let first_follows = Self::calc_first_follows(grammar, &nullables);
GrammarAnalysis {
nullables,
first_follows,
}
}
/// Returns the set of nullable symbols.
///
/// The result is a set of nonterminals.
/// A `Symbol` is nullable if it can expand into an empty string of terminals
/// through the application of some sequence of production rules.
pub fn nullables(&self) -> HashSet<Symbol<'g>> {
self.nullables.clone()
}
/// Returns whether the given symbol is nullable.
pub fn is_nullable(&self, symbol: Symbol<'g>) -> bool {
self.nullables.contains(&symbol)
}
pub fn first_seq(&self, seq: &[Symbol<'g>]) -> HashSet<Symbol<'g>> {
let mut result = HashSet::new();
for symbol in seq {
if symbol.is_terminal() {
result.insert(*symbol);
return result;
} else {
result.extend(self.first(*symbol));
if !self.is_nullable(*symbol) {
return result;
}
}
}
result
}
pub fn is_nullable_seq(&self, seq: &[Symbol<'g>]) -> bool {
seq.iter().copied().all(|symbol| self.is_nullable(symbol))
}
/// Returns the FIRST set for a nonterminal `Symbol`.
///
/// The result is a set of terminals.
/// A terminal is in the FIRST set of a nonterminal if some sequence of production rules
/// starting with that nonterminal expands to a string of terminals starting with that terminal.
pub fn first(&self, symbol: Symbol<'g>) -> HashSet<Symbol<'g>> {
self.first_follows.terminals_from(FFNode::First(symbol))
}
/// Returns the FOLLOW set for a nonterminal `Symbol`.
///
/// The result is a set of terminals.
/// A terminal is in the FOLLOW set of a nonterminal that terminal could legally follow the
/// nonterminal during parsing.
pub fn follow(&self, symbol: Symbol<'g>) -> HashSet<Symbol<'g>> {
self.first_follows.terminals_from(FFNode::Follow(symbol))
}
pub fn can_end_with(&self, start_symbol: Symbol<'g>, symbol: Symbol<'g>) -> bool {
self.first_follows.follows_to_follow(start_symbol).contains(&symbol)
}
fn calc_nullables(grammar: &'g Grammar) -> HashSet<Symbol<'g>> {
let mut nullables = HashSet::new();
// Repeat until an iteration adds nothing new.
loop {
let mut dirty = false;
for rule in grammar.rules() {
// Skip rules whose LHS is already known to be nullable.
if !nullables.contains(&rule.lhs()) {
// If we know (based on what we've seen so far) that all RHS symbols are nullable,
// then the LHS is also nullable.
if rule.rhs().iter().all(|symbol| nullables.contains(symbol)) {
nullables.insert(rule.lhs());
dirty = true;
}
}
}
if !dirty {
break;
}
}
nullables
}
// Calculate the FirstFollows graph of the `Grammar`
fn calc_first_follows(grammar: &'g Grammar, nullables: &HashSet<Symbol<'g>>) -> FirstFollows<'g> {
let mut first_follows = FirstFollows::new();
for rule in grammar.rules() {
// Firsts are Firsts
// Note: We iterate the rule's RHS until we hit a non-nullable symbol
for symbol in rule.rhs() {
if symbol.is_terminal() {
first_follows.link(FFNode::First(rule.lhs()), FFNode::Terminal(symbol));
} else {
first_follows.link(FFNode::First(rule.lhs()), FFNode::First(symbol));
}
// if we'ved reached the last nullable nonterminal, break early
if !nullables.contains(&symbol) {
break;
}
}
// Follows are Firsts, too
for (i, symbol) in rule.rhs().iter().copied().enumerate() {
// When `symbol`...
for j in i+1 .. rule.rhs().len() {
// ... is followed by `follow`...
let follow = rule.rhs()[j];
if follow.is_terminal() {
first_follows.link(FFNode::Follow(symbol), FFNode::Terminal(follow));
} else {
first_follows.link(FFNode::Follow(symbol), FFNode::First(follow));
}
// if we'ved reached the last nullable nonterminal, break early
if !nullables.contains(&follow) {
break;
}
}
}
// Follows can also be follows, though
// Note: We iterate the rule's RHS in reverse until we hit a non-nullable symbol
for symbol in rule.rhs().into_iter().rev() {
if symbol.is_nonterminal() {
first_follows.link(FFNode::Follow(symbol), FFNode::Follow(rule.lhs()));
}
if !nullables.contains(&symbol) {
break;
}
}
}
first_follows
}
}
// A graph data structure which tracks containment information
// for FIRST sets, FOLLOW sets, and temrinals
struct FirstFollows<'g> {
// An adjacency list mapping `from_node` to `to_node`.
// This indicates that the set represented by `from_node` contains the set represented by `to_node`
edges: HashMap<FFNode<'g>, HashSet<FFNode<'g>>>,
rev_edges: HashMap<FFNode<'g>, HashSet<FFNode<'g>>>,
}
#[derive(Eq, PartialEq, Hash, Clone, Copy)]
enum FFNode<'g> {
// The FIRST set of the symbol
First(Symbol<'g>),
// The FOLLOW set of the symbol
Follow(Symbol<'g>),
// The singleton set containing just the given symbol
Terminal(Symbol<'g>),
}
impl<'g> FirstFollows<'g> {
fn new() -> FirstFollows<'g> {
FirstFollows {
edges: HashMap::new(),
rev_edges: HashMap::new(),
}
}
// Declare that the set represented by `from_node` contains the set represented by `to_node`.
// Note: If the `from_node` is not present in the graph already, it allocates a new adjacency list for it.
fn link(&mut self, from_node: FFNode<'g>, to_node: FFNode<'g>) {
if !self.edges.contains_key(&from_node) {
self.edges.insert(from_node, HashSet::new());
}
self.edges.get_mut(&from_node).unwrap().insert(to_node);
if !self.rev_edges.contains_key(&to_node) {
self.rev_edges.insert(to_node, HashSet::new());
}
self.rev_edges.get_mut(&to_node).unwrap().insert(from_node);
}
// Perform a breadth-first search of the graph starting from `from_node`
// and return the set of terminals reachable from it.
// These are precisely the set represented by the node.
fn terminals_from(&self, from_node: FFNode<'g>) -> HashSet<Symbol<'g>> {
let mut visited = HashSet::new();
let mut queue = vec![from_node];
let mut terminals = HashSet::new();
while let Some(node) = queue.pop() {
visited.insert(node);
if let FFNode::Terminal(symbol) = node {
terminals.insert(symbol);
} else if self.edges.contains_key(&node) {
for next_node in &self.edges[&node] {
if !visited.contains(next_node) {
queue.push(*next_node);
}
}
}
}
terminals
}
// Find all nonterminals where `FOLLOW(start_symbol)` can be reached from `FOLLOW(N)`.
fn follows_to_follow(&self, start_symbol: Symbol<'g>) -> HashSet<Symbol<'g>> {
let mut visited = HashSet::new();
let mut queue = vec![FFNode::Follow(start_symbol)];
let mut nonterminals = HashSet::new();
while let Some(node) = queue.pop() {
visited.insert(node);
if let FFNode::Follow(symbol) = node {
nonterminals.insert(symbol);
}
if self.rev_edges.contains_key(&node) {
for next_node in &self.rev_edges[&node] {
if !visited.contains(next_node) {
queue.push(*next_node);
}
}
}
}
nonterminals
}
}