simple_parser_bootstrap/
parser.rs

1use std::collections::BTreeMap as Map;
2use simple_lexer_bootstrap::Token;
3
4type Result<T> = std::result::Result<T, &'static str>;
5
6#[derive(Clone, Debug, Eq, Ord, PartialEq, PartialOrd)]
7pub enum ParseTree<N, T> {
8    Token { token: Token<T> },
9    Nonterminal { nonterminal: N, tokens: Vec<Token<T>>, children: Vec<ParseTree<N, T>> },
10    Ephemeral { tokens: Vec<Token<T>>, children: Vec<ParseTree<N, T>> },
11}
12
13impl<N, T> ParseTree<N, T> {
14    pub fn tokens_len(&self) -> usize {
15        match self {
16            ParseTree::Token { .. } => 1,
17            ParseTree::Nonterminal { tokens, .. } => tokens.len(),
18            ParseTree::Ephemeral { tokens, .. } => tokens.len(),
19        }
20    }
21}
22
23#[derive(Clone, Debug, Eq, Ord, PartialEq, PartialOrd)]
24pub enum Expression<N, T> {
25    TokenKind { token_kind: T },
26    Nonterminal { nonterminal: N },
27    Alternation { expressions: Vec<Expression<N, T>> },
28    Concatenation { expressions: Vec<Expression<N, T>> },
29    Repetition { expression: Box<Expression<N, T>>, min: Option<u32>, max: Option<u32> },
30}
31
32impl<N: Clone + Ord, T: Clone + PartialEq> Expression<N, T> {
33    pub fn parse(&self, tokens: &[Token<T>], productions: &Map<N, Expression<N, T>>) -> Result<ParseTree<N, T>> {
34        match self {
35            Expression::TokenKind { token_kind } => {
36                if let Some(current_token) = tokens.get(0) {
37                    if token_kind == current_token.kind() {
38                        Ok(ParseTree::Token {
39                            token: current_token.clone(),
40                        })
41                    } else { Err("unexpected token") }
42                } else { Err("unexpected eof") }
43            },
44            Expression::Nonterminal { nonterminal } => {
45                if let Some(expression) = productions.get(nonterminal) {
46                    let parse_tree = expression.parse(tokens, productions)?;
47                    match parse_tree {
48                        ParseTree::Token { token } => {
49                            Ok(ParseTree::Nonterminal {
50                                nonterminal: nonterminal.clone(),
51                                tokens: vec![token.clone()],
52                                children: vec![ParseTree::Token { token }],
53                            })
54                        },
55                        ParseTree::Nonterminal { nonterminal: child_nonterminal, tokens, children } => {
56                            Ok(ParseTree::Nonterminal {
57                                nonterminal: nonterminal.clone(),
58                                tokens: tokens.clone(),
59                                children: vec![ParseTree::Nonterminal {
60                                    nonterminal: child_nonterminal,
61                                    tokens,
62                                    children,
63                                }],
64                            })
65                        },
66                        ParseTree::Ephemeral { tokens, children } => {
67                            Ok(ParseTree::Nonterminal {
68                                nonterminal: nonterminal.clone(),
69                                tokens,
70                                children,
71                            })
72                        },
73                    }
74                } else { Err("undefined nonterminal") }
75            },
76            Expression::Alternation { expressions } => {
77                let mut parse_trees = Vec::new();
78                for expression in expressions {
79                    if let Ok(parse_tree) = expression.parse(tokens, productions) {
80                        parse_trees.push(parse_tree);
81                    }
82                }
83                if let Some(mut parse_tree) = parse_trees.pop() {
84                    while let Some(current_parse_tree) = parse_trees.pop() {
85                        if parse_tree.tokens_len() < current_parse_tree.tokens_len() {
86                            parse_tree = current_parse_tree;
87                        }
88                    }
89                    match parse_tree {
90                        ParseTree::Token { token } => {
91                            Ok(ParseTree::Ephemeral {
92                                tokens: vec![token.clone()],
93                                children: vec![ParseTree::Token { token }],
94                            })
95                        },
96                        ParseTree::Nonterminal { nonterminal, tokens, children } => {
97                            Ok(ParseTree::Ephemeral {
98                                tokens: tokens.clone(),
99                                children: vec![ParseTree::Nonterminal {
100                                    nonterminal,
101                                    tokens,
102                                    children,
103                                }],
104                            })
105                        },
106                        ParseTree::Ephemeral { tokens, children } => {
107                            Ok(ParseTree::Ephemeral {
108                                tokens,
109                                children,
110                            })
111                        },
112                    }
113                } else { Err("no match") }
114            },
115            Expression::Concatenation { expressions } => {
116                let mut offset = 0;
117                let mut matched_tokens = Vec::new();
118                let mut children = Vec::new();
119                for expression in expressions {
120                    if let Ok(parse_tree) = expression.parse(&tokens[offset..], productions) {
121                        offset += parse_tree.tokens_len();
122                        match parse_tree {
123                            ParseTree::Token { token } => {
124                                matched_tokens.push(token.clone());
125                                children.push(ParseTree::Token { token });
126                            },
127                            ParseTree::Nonterminal { nonterminal, tokens, children: child_children } => {
128                                matched_tokens.extend(tokens.clone());
129                                children.push(ParseTree::Nonterminal {
130                                    nonterminal,
131                                    tokens,
132                                    children: child_children,
133                                });
134                            },
135                            ParseTree::Ephemeral { tokens, children: child_children } => {
136                                matched_tokens.extend(tokens);
137                                children.extend(child_children);
138                            },
139                        }
140                    } else { return Err("no match"); }
141                }
142                Ok(ParseTree::Ephemeral {
143                    tokens: matched_tokens,
144                    children,
145                })
146            },
147            Expression::Repetition { expression, min, max } => {
148                let mut count = 0;
149                let mut offset = 0;
150                let mut matched_tokens = Vec::new();
151                let mut children = Vec::new();
152                while match max { Some(max) => count < *max, None => true } {
153                    if let Ok(parse_tree) = expression.parse(&tokens[offset..], productions) {
154                        count += 1;
155                        offset += parse_tree.tokens_len();
156                        match parse_tree {
157                            ParseTree::Token { token } => {
158                                matched_tokens.push(token.clone());
159                                children.push(ParseTree::Token { token });
160                            },
161                            ParseTree::Nonterminal { nonterminal, tokens, children: child_children } => {
162                                matched_tokens.extend(tokens.clone());
163                                children.push(ParseTree::Nonterminal {
164                                    nonterminal,
165                                    tokens,
166                                    children: child_children,
167                                });
168                            },
169                            ParseTree::Ephemeral { tokens, children: child_children } => {
170                                matched_tokens.extend(tokens);
171                                children.extend(child_children);
172                            },
173                        }
174                    } else if match min { Some(min) => count >= *min, None => true } {
175                        return Ok(ParseTree::Ephemeral {
176                            tokens: matched_tokens,
177                            children,
178                        });
179                    } else { return Err("partial match"); }
180                }
181                Ok(ParseTree::Ephemeral {
182                    tokens: matched_tokens,
183                    children,
184                })
185            },
186        }
187    }
188}
189
190#[derive(Clone, Debug, Eq, Ord, PartialEq, PartialOrd)]
191pub struct Parser<N, T> {
192    productions: Map<N, Expression<N, T>>,
193    root: N,
194}
195
196impl<N, T> Parser<N, T> {
197    pub fn new(productions: Map<N, Expression<N, T>>, root: N) -> Parser<N, T> {
198        Parser { productions, root }
199    }
200}
201
202impl<N: Clone + Ord, T: Clone + PartialEq> Parser<N, T> {
203    pub fn parse(&self, tokens: &[Token<T>]) -> Result<ParseTree<N, T>> {
204        if let Some(expression) = self.productions.get(&self.root) {
205            let mut parse_tree = expression.parse(tokens, &self.productions)?;
206            if parse_tree.tokens_len() == tokens.len() {
207                parse_tree = match parse_tree {
208                    ParseTree::Token { token } => {
209                        ParseTree::Nonterminal {
210                            nonterminal: self.root.clone(),
211                            tokens: vec![token.clone()],
212                            children: vec![
213                                ParseTree::Token { token }
214                            ]
215                        }
216                    },
217                    ParseTree::Nonterminal { nonterminal, tokens, children } => {
218                        ParseTree::Nonterminal {
219                            nonterminal: self.root.clone(),
220                            tokens: tokens.clone(),
221                            children: vec![
222                                ParseTree::Nonterminal {
223                                    nonterminal,
224                                    tokens,
225                                    children
226                                }
227                            ]
228                        }
229                    },
230                    ParseTree::Ephemeral { tokens, children } => {
231                        ParseTree::Nonterminal {
232                            nonterminal: self.root.clone(),
233                            tokens,
234                            children,
235                        }
236                    },
237                };
238                Ok(parse_tree)
239            } else { Err("partial match") }
240        } else { Err("undefined root nonterminal") }
241    }
242}
243
244#[macro_export]
245macro_rules! tok {
246    ($x:expr) => {{
247        $crate::Expression::TokenKind { token_kind: $x }
248    }}
249}
250
251#[macro_export]
252macro_rules! non {
253    ($x:expr) => {{
254        $crate::Expression::Nonterminal { nonterminal: $x }
255    }}
256}
257
258#[macro_export]
259macro_rules! alt {
260    ($($x:expr),*) => {{
261        #[allow(unused_mut)]
262        let mut temp_vec = Vec::new();
263        $(temp_vec.push($x);)*
264        $crate::Expression::Alternation { expressions: temp_vec }
265    }}
266}
267
268#[macro_export]
269macro_rules! con {
270    ($($x:expr),*) => {{
271        #[allow(unused_mut)]
272        let mut temp_vec = Vec::new();
273        $(temp_vec.push($x);)*
274        $crate::Expression::Concatenation { expressions: temp_vec }
275    }}
276}
277
278#[macro_export]
279macro_rules! rep {
280    ($x:expr, $y:expr, $z:expr) => {{
281        $crate::Expression::Repetition { expression: Box::new($x), min: $y, max: $z }
282    }}
283}
284
285#[macro_export]
286macro_rules! ast { // asterisk
287    ($x:expr) => {{
288        $crate::Expression::Repetition { expression: Box::new($x), min: None, max: None }
289    }}
290}
291
292#[macro_export]
293macro_rules! plu { // plus sign
294    ($x:expr) => {{
295        $crate::Expression::Repetition { expression: Box::new($x), min: Some(1), max: None }
296    }}
297}
298
299#[macro_export]
300macro_rules! que { // question mark
301    ($x:expr) => {{
302        $crate::Expression::Repetition { expression: Box::new($x), min: None, max: Some(1) }
303    }}
304}
305
306#[cfg(test)]
307mod tests {
308    use super::Result;
309    use simple_lexer_bootstrap::Token;
310    use crate::{
311        ParseTree,
312        Parser,
313    };
314
315    #[test]
316    fn test_expression() -> Result<()> {
317        #[allow(non_camel_case_types)]
318        #[derive(Clone, Copy, Debug, Eq, Ord, PartialEq, PartialOrd)]
319        enum TokenKind {
320            PLUS_SIGN,
321            HYPHEN,
322            ASTERISK,
323            SLASH,
324            NUMBER,
325            LEFT_PARENTHESIS,
326            RIGHT_PARENTHESIS,
327        }
328        use TokenKind::*;
329        #[derive(Clone, Copy, Debug, Eq, Ord, PartialEq, PartialOrd)]
330        enum Nonterminal {
331            Addition,
332            Multiplication,
333            Atom,
334            Number,
335        }
336        use Nonterminal::*;
337        let expected = ParseTree::Nonterminal {
338            nonterminal: Addition,
339            tokens: vec![
340                Token::new(NUMBER, "1"),
341                Token::new(SLASH, "/"),
342                Token::new(LEFT_PARENTHESIS, "("),
343                Token::new(NUMBER, "2"),
344                Token::new(PLUS_SIGN, "+"),
345                Token::new(HYPHEN, "-"),
346                Token::new(NUMBER, "3"),
347                Token::new(RIGHT_PARENTHESIS, ")")
348            ],
349            children: vec![
350                ParseTree::Nonterminal {
351                    nonterminal: Multiplication,
352                    tokens: vec![
353                        Token::new(NUMBER, "1"),
354                        Token::new(SLASH, "/"),
355                        Token::new(LEFT_PARENTHESIS, "("),
356                        Token::new(NUMBER, "2"),
357                        Token::new(PLUS_SIGN, "+"),
358                        Token::new(HYPHEN, "-"),
359                        Token::new(NUMBER, "3"),
360                        Token::new(RIGHT_PARENTHESIS, ")")
361                    ],
362                    children: vec![
363                        ParseTree::Nonterminal {
364                            nonterminal: Atom,
365                            tokens: vec![Token::new(NUMBER, "1")],
366                            children: vec![
367                                ParseTree::Nonterminal {
368                                    nonterminal: Number,
369                                    tokens: vec![Token::new(NUMBER, "1")],
370                                    children: vec![
371                                        ParseTree::Token { token: Token::new(NUMBER, "1") }
372                                    ],
373                                },
374                            ],
375                        },
376                        ParseTree::Token { token: Token::new(SLASH, "/") },
377                        ParseTree::Nonterminal {
378                            nonterminal: Atom,
379                            tokens: vec![
380                                Token::new(LEFT_PARENTHESIS, "("),
381                                Token::new(NUMBER, "2"),
382                                Token::new(PLUS_SIGN, "+"),
383                                Token::new(HYPHEN, "-"),
384                                Token::new(NUMBER, "3"),
385                                Token::new(RIGHT_PARENTHESIS, ")")
386                            ],
387                            children: vec![
388                                ParseTree::Token { token: Token::new(LEFT_PARENTHESIS, "(") },
389                                ParseTree::Nonterminal {
390                                    nonterminal: Addition,
391                                    tokens: vec![
392                                        Token::new(NUMBER, "2"),
393                                        Token::new(PLUS_SIGN, "+"),
394                                        Token::new(HYPHEN, "-"),
395                                        Token::new(NUMBER, "3"),
396                                    ],
397                                    children: vec![
398                                        ParseTree::Nonterminal {
399                                            nonterminal: Multiplication,
400                                            tokens: vec![Token::new(NUMBER, "2")],
401                                            children: vec![
402                                                ParseTree::Nonterminal {
403                                                    nonterminal: Atom,
404                                                    tokens: vec![Token::new(NUMBER, "2")],
405                                                    children: vec![
406                                                        ParseTree::Nonterminal {
407                                                            nonterminal: Number,
408                                                            tokens: vec![Token::new(NUMBER, "2")],
409                                                            children: vec![
410                                                                ParseTree::Token { token: Token::new(NUMBER, "2") }
411                                                            ],
412                                                        }
413                                                    ]
414                                                },
415                                            ]
416                                        },
417                                        ParseTree::Token { token: Token::new(PLUS_SIGN, "+") },
418                                        ParseTree::Nonterminal {
419                                            nonterminal: Multiplication,
420                                            tokens: vec![
421                                                Token::new(HYPHEN, "-"),
422                                                Token::new(NUMBER, "3")
423                                            ],
424                                            children: vec![
425                                                ParseTree::Nonterminal {
426                                                    nonterminal: Atom,
427                                                    tokens: vec![
428                                                        Token::new(HYPHEN, "-"),
429                                                        Token::new(NUMBER, "3")
430                                                    ],
431                                                    children: vec![
432                                                        ParseTree::Nonterminal {
433                                                            nonterminal: Number,
434                                                            tokens: vec![
435                                                                Token::new(HYPHEN, "-"),
436                                                                Token::new(NUMBER, "3")
437                                                            ],
438                                                            children: vec![
439                                                                ParseTree::Token { token: Token::new(HYPHEN, "-") },
440                                                                ParseTree::Token { token: Token::new(NUMBER, "3") },
441                                                            ],
442                                                        }
443                                                    ]
444                                                },
445                                            ]
446                                        },
447                                    ]
448                                },
449                                ParseTree::Token { token: Token::new(RIGHT_PARENTHESIS, ")") },
450                            ]
451                        }
452                    ],
453                }
454            ]
455        };
456        let parser = Parser::new(map![
457            Addition => con![non!(Multiplication), ast!(con![alt![tok!(PLUS_SIGN), tok!(HYPHEN)], non!(Multiplication)])],
458            Multiplication => con![non!(Atom), ast!(con![alt![tok!(ASTERISK), tok!(SLASH)], non!(Atom)])],
459            Atom => alt![non!(Number), con![tok!(LEFT_PARENTHESIS), non!(Addition), tok!(RIGHT_PARENTHESIS)]],
460            Number => con![que!(alt![tok!(PLUS_SIGN), tok!(HYPHEN)]), tok!(NUMBER)]
461        ], Addition);
462        // 1 / (2 + -3)
463        let actual = parser.parse(&[
464            Token::new(NUMBER, "1"),
465            Token::new(SLASH, "/"),
466            Token::new(LEFT_PARENTHESIS, "("),
467            Token::new(NUMBER, "2"),
468            Token::new(PLUS_SIGN, "+"),
469            Token::new(HYPHEN, "-"),
470            Token::new(NUMBER, "3"), 
471            Token::new(RIGHT_PARENTHESIS, ")")
472        ])?;
473        assert_eq!(expected, actual);
474        Ok(())
475    }
476}