1use std::hash::Hash;
2
3use crate::ast::abstract_syntax_node::AbstractSyntaxNode;
4use crate::ast::abstract_syntax_tree::AbstractSyntaxTree;
5use crate::grammar::context_free_grammar::ContextFreeGrammar;
6use crate::grammar::first_follow_symbols::FirstFollowSymbols;
7use crate::parser::failed_production::FailedProduction;
8use crate::parser::failed_state::FailedState;
9use crate::parser::parse_result::ParseResult;
10use crate::parser::recursive_descent_parser_transitions::RecursiveDescentParserTransitions;
11use crate::token::token::Token;
12
13type ParseProductionResult<TLex, TSyntax> = Result<
14 AbstractSyntaxNode<Token<TLex, TSyntax>>,
15 FailedProduction<TLex, TSyntax>,
16>;
17
18type ParseSymbolResult<'a, TLex, TSyntax> = Result<
19 ParsingState<'a, TLex, TSyntax, std::vec::IntoIter<&'a Vec<TSyntax>>>,
20 FailedState<TLex, TSyntax>,
21>;
22
23struct ParsingState<'a, TLex, TSyntax: 'a, TIter: Iterator<Item=&'a Vec<TSyntax>>> {
24 initial_token_position: usize,
25 final_token_position: usize,
26 prod_iter_option: Option<TIter>,
27 node: AbstractSyntaxNode<Token<TLex, TSyntax>>
28}
29
30impl<'a, TLex, TSyntax, TIter: Iterator<Item=&'a Vec<TSyntax>>> ParsingState<'a, TLex, TSyntax, TIter> {
31 pub fn new(
32 initial_token_position: usize,
33 final_token_position: usize,
34 prod_iter_option: Option<TIter>,
35 node: AbstractSyntaxNode<Token<TLex, TSyntax>>,
36 ) -> Self {
37 ParsingState { initial_token_position, final_token_position, prod_iter_option, node }
38 }
39}
40
41pub struct RecursiveDescentParser<'a, TSyntax> {
42 grammar: &'a ContextFreeGrammar<TSyntax>,
43 transitions: RecursiveDescentParserTransitions<TSyntax>,
44}
45
46impl<'a, TSyntax: Clone + Eq + Hash> RecursiveDescentParser<'a, TSyntax> {
47 pub fn from_grammar(grammar: &'a ContextFreeGrammar<TSyntax>) -> Self {
48 let first_follow_symbols = FirstFollowSymbols::from(grammar);
49
50 Self::from_grammar_and_first_follow_symbols(grammar, &first_follow_symbols)
51 }
52
53 pub fn from_grammar_and_first_follow_symbols(
54 grammar: &'a ContextFreeGrammar<TSyntax>,
55 first_follow_symbols: &FirstFollowSymbols<TSyntax>,
56 ) -> Self {
57 Self {
58 grammar,
59 transitions: RecursiveDescentParserTransitions::from(grammar, first_follow_symbols),
60 }
61 }
62
63 pub fn parse_from_tokens<TLex: Clone + 'a, TIter: Iterator<Item=Token<TLex, TSyntax>>>(
64 &self,
65 tokens_iterator: TIter,
66 ) -> ParseResult<TLex, TSyntax> {
67 let tokens_vector = Self::iterator_to_vec(tokens_iterator);
68
69 if tokens_vector.len() == 0 {
70 panic!("Expecting at least one token!")
71 }
72
73 let symbol_to_derive = self.grammar.get_initial_symbol();
74 let token_position: usize = 0;
75 let first_token = tokens_vector.get(token_position).unwrap();
76
77 let first_token_productions
78 = self.inner_get_token_productions(symbol_to_derive, &first_token.t_type);
79
80 let first_token_productions_iter = first_token_productions.into_iter();
81
82 let state_result = self.inner_parse_from_tokens(
83 symbol_to_derive,
84 &tokens_vector,
85 token_position,
86 first_token_productions_iter,
87 );
88
89 state_result.map(|state| AbstractSyntaxTree::new(state.node))
90 }
91
92 fn build_token_failed_state<TLex: Clone>(
93 production_symbol: &TSyntax,
94 ) -> ParseSymbolResult<'a, TLex, TSyntax> {
95 let failed_state = FailedState::new(vec![], production_symbol.clone());
96 Err(failed_state)
97 }
98
99 fn inner_get_token_productions(
100 &self,
101 symbol_to_derive: &TSyntax,
102 first_token: &TSyntax,
103 ) -> Vec<&Vec<TSyntax>> {
104 self.transitions
105 .get_productions(symbol_to_derive, first_token)
106 .map(
107 |productions| -> Vec<&Vec<TSyntax>> {
108 productions
109 .iter()
110 .map(|production| &production.output).collect()
111 }
112 )
113 .unwrap_or(vec![])
114 }
115
116 fn inner_parse_from_tokens<TLex: Clone>(
117 &self,
118 symbol_to_derive: &TSyntax,
119 tokens: &Vec<Token<TLex, TSyntax>>,
120 tokens_position: usize,
121 mut production_outputs: std::vec::IntoIter<&'a Vec<TSyntax>>,
122 ) -> ParseSymbolResult<'a, TLex, TSyntax> {
123 let mut failed_productions: Vec<FailedProduction<TLex, TSyntax>> = vec![];
124
125 for production_output in &mut production_outputs {
126 let mut current_token_position = tokens_position;
127 let node_option = self.inner_parse_from_tokens_production(
128 symbol_to_derive,
129 tokens,
130 &mut current_token_position,
131 production_output,
132 );
133
134 match node_option {
135 Ok(node) => {
136 return Ok(ParsingState::new(
137 tokens_position,
138 current_token_position,
139 Some(production_outputs),
140 node,
141 ));
142 },
143 Err(failed_production) => {
144 failed_productions.push(failed_production);
145 }
146 }
147 }
148
149 Err(FailedState::new(
150 failed_productions,
151 symbol_to_derive.clone(),
152 ))
153 }
154
155 fn inner_parse_from_tokens_production<TLex: Clone>(
156 &self,
157 symbol_to_derive: &TSyntax,
158 tokens: &Vec<Token<TLex, TSyntax>>,
159 current_token_position: &mut usize,
160 production_output: &'a Vec<TSyntax>,
161 ) -> ParseProductionResult<TLex, TSyntax> {
162 let mut states: Vec<ParsingState<'_, TLex, TSyntax, std::vec::IntoIter<&Vec<TSyntax>>>> = Vec::new();
163
164 while states.len() < production_output.len() {
165 let production_symbol = production_output.get(states.len()).unwrap();
166 let state_option = self.inner_parse_from_tokens_production_symbol(
167 production_symbol,
168 tokens,
169 *current_token_position,
170 );
171
172 match state_option {
173 Ok(state) => {
174 *current_token_position = state.final_token_position;
175 states.push(state);
176 }
177 Err(failed_state) => {
178 match self.inner_parse_pop_states(&mut states, tokens) {
179 Some(production_parsing_states) => {
180 return Self::inner_parse_from_tokens_production_build_failed_state(
181 failed_state,
182 production_output,
183 production_parsing_states,
184 );
185 },
186 None => {
187 *current_token_position = states.get(
188 states.len() - 1
189 ).unwrap().final_token_position;
190 }
191 }
192 }
193 }
194 }
195
196 Self::inner_parse_from_tokens_production_build_node(
197 symbol_to_derive,
198 states,
199 )
200 }
201
202 fn inner_parse_from_tokens_production_build_failed_state<'b, TLex>(
203 failed_state: FailedState<TLex, TSyntax>,
204 production_output: &Vec<TSyntax>,
205 production_parsing_states: Vec<
206 ParsingState<'b, TLex, TSyntax, std::vec::IntoIter<&'b Vec<TSyntax>>>
207 >,
208 ) -> ParseProductionResult<TLex, TSyntax> {
209 let mut pending_symbols: Vec<FailedState<TLex, TSyntax>> = vec![failed_state];
210
211 let child_nodes: Vec<AbstractSyntaxNode<Token<TLex, TSyntax>>> =
212 production_parsing_states.into_iter().map(|state| state.node).collect();
213
214 for i in child_nodes.len() + 1 .. production_output.len() {
215 pending_symbols.push(
216 FailedState::new(
217 vec![],
218 production_output.get(i).unwrap().clone(),
219 ),
220 );
221 }
222
223 let failing_state: FailedProduction<TLex, TSyntax> = FailedProduction::new(
224 child_nodes,
225 pending_symbols,
226 );
227
228 Err(failing_state)
229 }
230
231 fn inner_parse_from_tokens_production_build_node<'b, TLex>(
232 symbol_to_derive: &TSyntax,
233 states: Vec<ParsingState<'b, TLex, TSyntax, std::vec::IntoIter<&'b Vec<TSyntax>>>>
234 ) -> ParseProductionResult<TLex, TSyntax> {
235 let child_nodes: Vec<AbstractSyntaxNode<Token<TLex, TSyntax>>> =
236 states.into_iter().map(|state| state.node).collect();
237
238 let node: AbstractSyntaxNode<Token<TLex, TSyntax>>
239 = AbstractSyntaxNode::new(
240 child_nodes,
241 Token::new(None, symbol_to_derive.clone()),
242 );
243
244 Ok(node)
245 }
246
247 fn inner_parse_from_tokens_production_non_terminal<TLex: Clone>(
248 &self,
249 production_symbol: &TSyntax,
250 tokens: &Vec<Token<TLex, TSyntax>>,
251 token_position: usize,
252 ) -> ParseSymbolResult<TLex, TSyntax> {
253 let current_token_symbol = tokens.get(token_position).unwrap();
254
255 let token_productions = self.inner_get_token_productions(
256 production_symbol,
257 ¤t_token_symbol.t_type,
258 );
259
260 let token_productions_iter = token_productions.into_iter();
261
262 self.inner_parse_from_tokens(
263 production_symbol,
264 tokens,
265 token_position,
266 token_productions_iter,
267 )
268 }
269
270 fn inner_parse_terminal_symbol<TLex: Clone>(
271 &self,
272 production_symbol: &TSyntax,
273 tokens: &Vec<Token<TLex, TSyntax>>,
274 token_position: usize,
275 ) -> ParseSymbolResult<'a, TLex, TSyntax> {
276 if self.grammar.get_epsilon_symbol().eq(production_symbol) {
277 let state: ParsingState<'a, TLex, TSyntax, std::vec::IntoIter<&'a Vec<TSyntax>>>
278 = ParsingState::new(
279 token_position,
280 token_position,
281 None,
282 AbstractSyntaxNode::new(vec![], Token::new(None, production_symbol.clone())),
283 );
284
285 Ok(state)
286 } else {
287 Self::inner_parse_non_epsilon_terminal_symbol(
288 production_symbol,
289 tokens,
290 token_position,
291 )
292 }
293 }
294
295 fn inner_parse_non_epsilon_terminal_symbol<TLex: Clone>(
296 production_symbol: &TSyntax,
297 tokens: &Vec<Token<TLex, TSyntax>>,
298 token_position: usize,
299 ) -> ParseSymbolResult<'a, TLex, TSyntax> {
300 let current_token_symbol_option = tokens.get(token_position);
301
302 match current_token_symbol_option {
303 Some(current_token_symbol) => {
304 if production_symbol.eq(¤t_token_symbol.t_type) {
305 let state: ParsingState<'a, TLex, TSyntax, std::vec::IntoIter<&'a Vec<TSyntax>>>
306 = ParsingState::new(
307 token_position,
308 token_position + 1,
309 None,
310 AbstractSyntaxNode::new(vec![], current_token_symbol.clone())
311 );
312
313 Ok(state)
314 } else {
315 Self::build_token_failed_state(production_symbol)
316 }
317 },
318 None => {
319 Self::build_token_failed_state(production_symbol)
320 }
321 }
322 }
323
324 fn inner_parse_from_tokens_production_symbol<TLex: Clone>(
325 &self,
326 production_symbol: &TSyntax,
327 tokens: &Vec<Token<TLex, TSyntax>>,
328 token_position: usize,
329 ) -> ParseSymbolResult<TLex, TSyntax> {
330 if self.grammar.is_non_terminal(production_symbol) {
331 self.inner_parse_from_tokens_production_non_terminal(
332 production_symbol,
333 tokens,
334 token_position,
335 )
336 } else {
337 self.inner_parse_terminal_symbol(
338 production_symbol,
339 tokens,
340 token_position,
341 )
342 }
343 }
344
345 fn inner_parse_pop_states<TLex: Clone>(
346 &self,
347 states: &mut Vec<ParsingState<'a, TLex, TSyntax, std::vec::IntoIter<&'a Vec<TSyntax>>>>,
348 tokens: &Vec<Token<TLex, TSyntax>>,
349 ) -> Option<Vec<ParsingState<'a, TLex, TSyntax, std::vec::IntoIter<&'a Vec<TSyntax>>>>> {
350 let mut reversed_failed_states: Vec<ParsingState<'a, TLex, TSyntax, std::vec::IntoIter<&'a Vec<TSyntax>>>> = vec![];
351 let mut states_pop_success: bool = false;
352
353 while states.len() > 0 && !states_pop_success {
354 let last_state = states.pop().unwrap();
355
356 if last_state.prod_iter_option.is_some() {
357 let productions_iterator = last_state.prod_iter_option.unwrap();
358
359 let state_option = self.inner_parse_from_tokens(
360 &last_state.node.token.t_type,
361 tokens,
362 last_state.initial_token_position,
363 productions_iterator,
364 );
365
366 match state_option {
367 Ok(state) => {
368 states.push(state);
369 states_pop_success = true;
370 }
371 _ => { }
372 }
373 } else {
374 reversed_failed_states.push(last_state);
375 }
376 }
377
378 if states_pop_success {
379 None
380 } else {
381 reversed_failed_states.reverse();
382 Some(reversed_failed_states)
383 }
384 }
385}
386
387impl<'a, T: Clone> RecursiveDescentParser<'a, T> {
388 fn iterator_to_vec<TElem, TIter: Iterator<Item=TElem>>(iterator: TIter) -> Vec<TElem> {
389 let mut vector: Vec<TElem> = vec![];
390
391 iterator.for_each(|symbol| { vector.push(symbol) });
392
393 vector
394 }
395}