artlr_syntax/parser/
recursive_descent_parser.rs

1use std::hash::Hash;
2
3use crate::ast::abstract_syntax_node::AbstractSyntaxNode;
4use crate::ast::abstract_syntax_tree::AbstractSyntaxTree;
5use crate::grammar::context_free_grammar::ContextFreeGrammar;
6use crate::grammar::first_follow_symbols::FirstFollowSymbols;
7use crate::parser::failed_production::FailedProduction;
8use crate::parser::failed_state::FailedState;
9use crate::parser::parse_result::ParseResult;
10use crate::parser::recursive_descent_parser_transitions::RecursiveDescentParserTransitions;
11use crate::token::token::Token;
12
13type ParseProductionResult<TLex, TSyntax> = Result<
14    AbstractSyntaxNode<Token<TLex, TSyntax>>,
15    FailedProduction<TLex, TSyntax>,
16>;
17
18type ParseSymbolResult<'a, TLex, TSyntax> = Result<
19    ParsingState<'a, TLex, TSyntax, std::vec::IntoIter<&'a Vec<TSyntax>>>,
20    FailedState<TLex, TSyntax>,
21>;
22
23struct ParsingState<'a, TLex, TSyntax: 'a, TIter: Iterator<Item=&'a Vec<TSyntax>>> {
24    initial_token_position: usize,
25    final_token_position: usize,
26    prod_iter_option: Option<TIter>,
27    node: AbstractSyntaxNode<Token<TLex, TSyntax>>
28}
29
30impl<'a, TLex, TSyntax, TIter: Iterator<Item=&'a Vec<TSyntax>>> ParsingState<'a, TLex, TSyntax, TIter> {
31    pub fn new(
32        initial_token_position: usize,
33        final_token_position: usize,
34        prod_iter_option: Option<TIter>,
35        node: AbstractSyntaxNode<Token<TLex, TSyntax>>,
36    ) -> Self {
37        ParsingState { initial_token_position, final_token_position, prod_iter_option, node }
38    }
39}
40
41pub struct RecursiveDescentParser<'a, TSyntax> {
42    grammar: &'a ContextFreeGrammar<TSyntax>,
43    transitions: RecursiveDescentParserTransitions<TSyntax>,
44}
45
46impl<'a, TSyntax: Clone + Eq + Hash> RecursiveDescentParser<'a, TSyntax> {
47    pub fn from_grammar(grammar: &'a ContextFreeGrammar<TSyntax>) -> Self {
48        let first_follow_symbols = FirstFollowSymbols::from(grammar);
49
50        Self::from_grammar_and_first_follow_symbols(grammar, &first_follow_symbols)
51    }
52
53    pub fn from_grammar_and_first_follow_symbols(
54        grammar: &'a ContextFreeGrammar<TSyntax>,
55        first_follow_symbols: &FirstFollowSymbols<TSyntax>,
56    ) -> Self {
57        Self {
58            grammar,
59            transitions: RecursiveDescentParserTransitions::from(grammar, first_follow_symbols),
60        }
61    }
62
63    pub fn parse_from_tokens<TLex: Clone + 'a, TIter: Iterator<Item=Token<TLex, TSyntax>>>(
64        &self,
65        tokens_iterator: TIter,
66    ) -> ParseResult<TLex, TSyntax> {
67        let tokens_vector = Self::iterator_to_vec(tokens_iterator);
68
69        if tokens_vector.len() == 0 {
70            panic!("Expecting at least one token!")
71        }
72
73        let symbol_to_derive = self.grammar.get_initial_symbol();
74        let token_position: usize = 0;
75        let first_token = tokens_vector.get(token_position).unwrap();
76
77        let first_token_productions
78            = self.inner_get_token_productions(symbol_to_derive, &first_token.t_type);
79
80        let first_token_productions_iter = first_token_productions.into_iter();
81
82        let state_result = self.inner_parse_from_tokens(
83            symbol_to_derive,
84            &tokens_vector,
85            token_position,
86            first_token_productions_iter,
87        );
88
89        state_result.map(|state| AbstractSyntaxTree::new(state.node))
90    }
91
92    fn build_token_failed_state<TLex: Clone>(
93        production_symbol: &TSyntax,
94    ) -> ParseSymbolResult<'a, TLex, TSyntax> {
95        let failed_state = FailedState::new(vec![], production_symbol.clone());
96        Err(failed_state)
97    }
98
99    fn inner_get_token_productions(
100        &self,
101        symbol_to_derive: &TSyntax,
102        first_token: &TSyntax,
103    ) -> Vec<&Vec<TSyntax>> {
104        self.transitions
105            .get_productions(symbol_to_derive, first_token)
106            .map(
107                |productions| -> Vec<&Vec<TSyntax>> {
108                    productions
109                        .iter()
110                        .map(|production| &production.output).collect()
111                }
112            )
113            .unwrap_or(vec![])
114    }
115
116    fn inner_parse_from_tokens<TLex: Clone>(
117        &self,
118        symbol_to_derive: &TSyntax,
119        tokens: &Vec<Token<TLex, TSyntax>>,
120        tokens_position: usize,
121        mut production_outputs: std::vec::IntoIter<&'a Vec<TSyntax>>,
122    ) -> ParseSymbolResult<'a, TLex, TSyntax> {
123        let mut failed_productions: Vec<FailedProduction<TLex, TSyntax>> = vec![];
124
125        for production_output in &mut production_outputs {
126            let mut current_token_position = tokens_position;
127            let node_option = self.inner_parse_from_tokens_production(
128                symbol_to_derive,
129                tokens,
130                &mut current_token_position,
131                production_output,
132            );
133
134            match node_option {
135                Ok(node) => {
136                    return Ok(ParsingState::new(
137                        tokens_position,
138                        current_token_position,
139                        Some(production_outputs),
140                        node,
141                    ));
142                },
143                Err(failed_production) => {
144                    failed_productions.push(failed_production);
145                }
146            }
147        }
148
149        Err(FailedState::new(
150            failed_productions,
151            symbol_to_derive.clone(),
152        ))
153    }
154
155    fn inner_parse_from_tokens_production<TLex: Clone>(
156        &self,
157        symbol_to_derive: &TSyntax,
158        tokens: &Vec<Token<TLex, TSyntax>>,
159        current_token_position: &mut usize,
160        production_output: &'a Vec<TSyntax>,
161    ) -> ParseProductionResult<TLex, TSyntax> {
162        let mut states: Vec<ParsingState<'_, TLex, TSyntax, std::vec::IntoIter<&Vec<TSyntax>>>> = Vec::new();
163
164        while states.len() < production_output.len() {
165            let production_symbol = production_output.get(states.len()).unwrap();
166            let state_option = self.inner_parse_from_tokens_production_symbol(
167                production_symbol,
168                tokens,
169                *current_token_position,
170            );
171
172            match state_option {
173                Ok(state) => {
174                    *current_token_position = state.final_token_position;
175                    states.push(state);
176                }
177                Err(failed_state) => {
178                    match self.inner_parse_pop_states(&mut states, tokens) {
179                        Some(production_parsing_states) => {
180                            return Self::inner_parse_from_tokens_production_build_failed_state(
181                                failed_state,
182                                production_output,
183                                production_parsing_states,
184                            );
185                        },
186                        None => {
187                            *current_token_position = states.get(
188                                states.len() - 1
189                            ).unwrap().final_token_position;
190                        }
191                    }
192                }
193            }
194        }
195
196        Self::inner_parse_from_tokens_production_build_node(
197            symbol_to_derive,
198            states,
199        )
200    }
201
202    fn inner_parse_from_tokens_production_build_failed_state<'b, TLex>(
203        failed_state: FailedState<TLex, TSyntax>,
204        production_output: &Vec<TSyntax>,
205        production_parsing_states: Vec<
206            ParsingState<'b, TLex, TSyntax, std::vec::IntoIter<&'b Vec<TSyntax>>>
207        >,
208    ) -> ParseProductionResult<TLex, TSyntax> {
209        let mut pending_symbols: Vec<FailedState<TLex, TSyntax>> = vec![failed_state];
210
211        let child_nodes: Vec<AbstractSyntaxNode<Token<TLex, TSyntax>>> =
212            production_parsing_states.into_iter().map(|state| state.node).collect();
213
214        for i in child_nodes.len() + 1 .. production_output.len() {
215            pending_symbols.push(
216                FailedState::new(
217                    vec![],
218                    production_output.get(i).unwrap().clone(),
219                ),
220            );
221        }
222        
223        let failing_state: FailedProduction<TLex, TSyntax> = FailedProduction::new(
224            child_nodes,
225            pending_symbols,
226        );
227
228        Err(failing_state)
229    }
230
231    fn inner_parse_from_tokens_production_build_node<'b, TLex>(
232        symbol_to_derive: &TSyntax,
233        states: Vec<ParsingState<'b, TLex, TSyntax, std::vec::IntoIter<&'b Vec<TSyntax>>>>
234    ) -> ParseProductionResult<TLex, TSyntax> {
235        let child_nodes: Vec<AbstractSyntaxNode<Token<TLex, TSyntax>>> =
236            states.into_iter().map(|state| state.node).collect();
237
238        let node: AbstractSyntaxNode<Token<TLex, TSyntax>>
239            = AbstractSyntaxNode::new(
240            child_nodes,
241            Token::new(None, symbol_to_derive.clone()),
242        );
243
244        Ok(node)
245    }
246
247    fn inner_parse_from_tokens_production_non_terminal<TLex: Clone>(
248        &self,
249        production_symbol: &TSyntax,
250        tokens: &Vec<Token<TLex, TSyntax>>,
251        token_position: usize,
252    ) -> ParseSymbolResult<TLex, TSyntax> {
253        let current_token_symbol = tokens.get(token_position).unwrap();
254
255        let token_productions = self.inner_get_token_productions(
256            production_symbol,
257            &current_token_symbol.t_type,
258        );
259
260        let token_productions_iter = token_productions.into_iter();
261
262        self.inner_parse_from_tokens(
263            production_symbol,
264            tokens,
265            token_position,
266            token_productions_iter,
267        )
268    }
269
270    fn inner_parse_terminal_symbol<TLex: Clone>(
271        &self,
272        production_symbol: &TSyntax,
273        tokens: &Vec<Token<TLex, TSyntax>>,
274        token_position: usize,
275    ) -> ParseSymbolResult<'a, TLex, TSyntax> {
276        if self.grammar.get_epsilon_symbol().eq(production_symbol) {
277            let state: ParsingState<'a, TLex, TSyntax, std::vec::IntoIter<&'a Vec<TSyntax>>>
278                = ParsingState::new(
279                token_position,
280                token_position,
281                None,
282                AbstractSyntaxNode::new(vec![], Token::new(None, production_symbol.clone())),
283            );
284
285            Ok(state)
286        } else {
287            Self::inner_parse_non_epsilon_terminal_symbol(
288                production_symbol,
289                tokens,
290                token_position,
291            )
292        }
293    }
294
295    fn inner_parse_non_epsilon_terminal_symbol<TLex: Clone>(
296        production_symbol: &TSyntax,
297        tokens: &Vec<Token<TLex, TSyntax>>,
298        token_position: usize,
299    ) -> ParseSymbolResult<'a, TLex, TSyntax> {
300        let current_token_symbol_option = tokens.get(token_position);
301
302        match current_token_symbol_option {
303            Some(current_token_symbol) => {
304                if production_symbol.eq(&current_token_symbol.t_type) {
305                    let state: ParsingState<'a, TLex, TSyntax, std::vec::IntoIter<&'a Vec<TSyntax>>>
306                        = ParsingState::new(
307                        token_position,
308                        token_position + 1,
309                        None,
310                        AbstractSyntaxNode::new(vec![], current_token_symbol.clone())
311                    );
312    
313                    Ok(state)
314                } else {
315                    Self::build_token_failed_state(production_symbol)
316                }
317            },
318            None => {
319                Self::build_token_failed_state(production_symbol)
320            }
321        }
322    }
323
324    fn inner_parse_from_tokens_production_symbol<TLex: Clone>(
325        &self,
326        production_symbol: &TSyntax,
327        tokens: &Vec<Token<TLex, TSyntax>>,
328        token_position: usize,
329    ) -> ParseSymbolResult<TLex, TSyntax> {
330        if self.grammar.is_non_terminal(production_symbol) {
331            self.inner_parse_from_tokens_production_non_terminal(
332                production_symbol,
333                tokens,
334                token_position,
335            )
336        } else {
337            self.inner_parse_terminal_symbol(
338                production_symbol,
339                tokens,
340                token_position,
341            )
342        }
343    }
344
345    fn inner_parse_pop_states<TLex: Clone>(
346        &self,
347        states: &mut Vec<ParsingState<'a, TLex, TSyntax, std::vec::IntoIter<&'a Vec<TSyntax>>>>,
348        tokens: &Vec<Token<TLex, TSyntax>>,
349    ) -> Option<Vec<ParsingState<'a, TLex, TSyntax, std::vec::IntoIter<&'a Vec<TSyntax>>>>> {
350        let mut reversed_failed_states: Vec<ParsingState<'a, TLex, TSyntax, std::vec::IntoIter<&'a Vec<TSyntax>>>> = vec![];
351        let mut states_pop_success: bool = false;
352
353        while states.len() > 0 && !states_pop_success {
354            let last_state = states.pop().unwrap();
355
356            if last_state.prod_iter_option.is_some() {
357                let productions_iterator = last_state.prod_iter_option.unwrap();
358
359                let state_option = self.inner_parse_from_tokens(
360                    &last_state.node.token.t_type,
361                    tokens,
362                    last_state.initial_token_position,
363                    productions_iterator,
364                );
365
366                match state_option {
367                    Ok(state) => {
368                        states.push(state);
369                        states_pop_success = true;
370                    }
371                    _ => { }
372                }
373            } else {
374                reversed_failed_states.push(last_state);
375            }
376        }
377
378        if states_pop_success {
379            None
380        } else {
381            reversed_failed_states.reverse();
382            Some(reversed_failed_states)
383        }
384    }
385}
386
387impl<'a, T: Clone> RecursiveDescentParser<'a, T> {
388    fn iterator_to_vec<TElem, TIter: Iterator<Item=TElem>>(iterator: TIter) -> Vec<TElem> {
389        let mut vector: Vec<TElem> = vec![];
390
391        iterator.for_each(|symbol| { vector.push(symbol) });
392
393        vector
394    }
395}