grammar_utils/
analysis.rs1use std::collections::{HashMap, HashSet};
2
3use super::*;
4
5pub struct GrammarAnalysis<'g> {
8 nullables: HashSet<Symbol<'g>>,
9 first_follows: FirstFollows<'g>,
10}
11
12impl<'g> GrammarAnalysis<'g> {
13 pub fn build(grammar: &'g Grammar) -> GrammarAnalysis<'g> {
16 let nullables = Self::calc_nullables(grammar);
17 let first_follows = Self::calc_first_follows(grammar, &nullables);
18
19 GrammarAnalysis {
20 nullables,
21 first_follows,
22 }
23 }
24
25 pub fn nullables(&self) -> HashSet<Symbol<'g>> {
31 self.nullables.clone()
32 }
33
34 pub fn is_nullable(&self, symbol: Symbol<'g>) -> bool {
36 self.nullables.contains(&symbol)
37 }
38
39 pub fn first_seq(&self, seq: &[Symbol<'g>]) -> HashSet<Symbol<'g>> {
40 let mut result = HashSet::new();
41
42 for symbol in seq {
43 if symbol.is_terminal() {
44 result.insert(*symbol);
45 return result;
46 } else {
47 result.extend(self.first(*symbol));
48 if !self.is_nullable(*symbol) {
49 return result;
50 }
51 }
52 }
53 result
54 }
55
56 pub fn is_nullable_seq(&self, seq: &[Symbol<'g>]) -> bool {
57 seq.iter().copied().all(|symbol| self.is_nullable(symbol))
58 }
59
60 pub fn first(&self, symbol: Symbol<'g>) -> HashSet<Symbol<'g>> {
66 self.first_follows.terminals_from(FFNode::First(symbol))
67 }
68
69 pub fn follow(&self, symbol: Symbol<'g>) -> HashSet<Symbol<'g>> {
75 self.first_follows.terminals_from(FFNode::Follow(symbol))
76 }
77
78 pub fn can_end_with(&self, start_symbol: Symbol<'g>, symbol: Symbol<'g>) -> bool {
79 self.first_follows.follows_to_follow(start_symbol).contains(&symbol)
80 }
81
82 fn calc_nullables(grammar: &'g Grammar) -> HashSet<Symbol<'g>> {
83 let mut nullables = HashSet::new();
84
85 loop {
87 let mut dirty = false;
88
89 for rule in grammar.rules() {
90 if !nullables.contains(&rule.lhs()) {
92 if rule.rhs().iter().all(|symbol| nullables.contains(symbol)) {
95 nullables.insert(rule.lhs());
96 dirty = true;
97 }
98 }
99 }
100
101 if !dirty {
102 break;
103 }
104 }
105
106 nullables
107 }
108
109 fn calc_first_follows(grammar: &'g Grammar, nullables: &HashSet<Symbol<'g>>) -> FirstFollows<'g> {
111 let mut first_follows = FirstFollows::new();
112
113 for rule in grammar.rules() {
114 for symbol in rule.rhs() {
117 if symbol.is_terminal() {
118 first_follows.link(FFNode::First(rule.lhs()), FFNode::Terminal(symbol));
119 } else {
120 first_follows.link(FFNode::First(rule.lhs()), FFNode::First(symbol));
121 }
122
123 if !nullables.contains(&symbol) {
125 break;
126 }
127 }
128
129 for (i, symbol) in rule.rhs().iter().copied().enumerate() {
131 for j in i+1 .. rule.rhs().len() {
133 let follow = rule.rhs()[j];
135
136 if follow.is_terminal() {
137 first_follows.link(FFNode::Follow(symbol), FFNode::Terminal(follow));
138 } else {
139 first_follows.link(FFNode::Follow(symbol), FFNode::First(follow));
140 }
141
142 if !nullables.contains(&follow) {
144 break;
145 }
146 }
147 }
148
149 for symbol in rule.rhs().into_iter().rev() {
152 if symbol.is_nonterminal() {
153 first_follows.link(FFNode::Follow(symbol), FFNode::Follow(rule.lhs()));
154 }
155
156 if !nullables.contains(&symbol) {
157 break;
158 }
159 }
160 }
161
162 first_follows
163 }
164}
165
166struct FirstFollows<'g> {
169 edges: HashMap<FFNode<'g>, HashSet<FFNode<'g>>>,
172
173 rev_edges: HashMap<FFNode<'g>, HashSet<FFNode<'g>>>,
174}
175
176#[derive(Eq, PartialEq, Hash, Clone, Copy)]
177enum FFNode<'g> {
178 First(Symbol<'g>),
180 Follow(Symbol<'g>),
182 Terminal(Symbol<'g>),
184}
185
186impl<'g> FirstFollows<'g> {
187 fn new() -> FirstFollows<'g> {
188 FirstFollows {
189 edges: HashMap::new(),
190 rev_edges: HashMap::new(),
191 }
192 }
193
194 fn link(&mut self, from_node: FFNode<'g>, to_node: FFNode<'g>) {
197 if !self.edges.contains_key(&from_node) {
198 self.edges.insert(from_node, HashSet::new());
199 }
200 self.edges.get_mut(&from_node).unwrap().insert(to_node);
201
202 if !self.rev_edges.contains_key(&to_node) {
203 self.rev_edges.insert(to_node, HashSet::new());
204 }
205 self.rev_edges.get_mut(&to_node).unwrap().insert(from_node);
206 }
207
208 fn terminals_from(&self, from_node: FFNode<'g>) -> HashSet<Symbol<'g>> {
212 let mut visited = HashSet::new();
213 let mut queue = vec![from_node];
214 let mut terminals = HashSet::new();
215
216 while let Some(node) = queue.pop() {
217 visited.insert(node);
218 if let FFNode::Terminal(symbol) = node {
219 terminals.insert(symbol);
220 } else if self.edges.contains_key(&node) {
221 for next_node in &self.edges[&node] {
222 if !visited.contains(next_node) {
223 queue.push(*next_node);
224 }
225 }
226 }
227 }
228
229 terminals
230 }
231
232 fn follows_to_follow(&self, start_symbol: Symbol<'g>) -> HashSet<Symbol<'g>> {
234 let mut visited = HashSet::new();
235 let mut queue = vec![FFNode::Follow(start_symbol)];
236 let mut nonterminals = HashSet::new();
237
238 while let Some(node) = queue.pop() {
239 visited.insert(node);
240 if let FFNode::Follow(symbol) = node {
241 nonterminals.insert(symbol);
242 }
243
244 if self.rev_edges.contains_key(&node) {
245 for next_node in &self.rev_edges[&node] {
246 if !visited.contains(next_node) {
247 queue.push(*next_node);
248 }
249 }
250 }
251 }
252
253 nonterminals
254 }
255}