spore_vm/parser/
ast.rs

1use compact_str::CompactString;
2use thiserror::Error;
3
4use super::{
5    span::Span,
6    tokenizer::{Token, TokenType},
7};
8
9type Result<T> = std::result::Result<T, AstParseError>;
10
11/// Describes an AST parsing error.
12#[derive(Debug, Error, PartialEq)]
13pub enum AstParseError {
14    #[error("opening parenthesis was unclosed")]
15    UnclosedParen,
16    #[error("found unexpected closing parenthesis")]
17    UnexpectedCloseParen,
18    #[error("string was not properly closed, did you forget \"?")]
19    UnclosedString(Span),
20}
21
22/// Describes a node in the AST.
23#[derive(Debug, PartialEq)]
24pub enum Node {
25    /// A node containing the start and end points of the identifier.
26    Identifier(Span),
27    /// A node containing the void literal.
28    Void(Span),
29    /// A node containing a boolean literal.
30    Bool(Span, bool),
31    /// A node containing an int literal.
32    Int(Span, i64),
33    /// A node containing a float literal.
34    Float(Span, f64),
35    /// A node containing a string literal.
36    String(Span),
37    /// A node containing a sub-tree.
38    Tree(Span, Vec<Node>),
39}
40
41impl Node {
42    /// Parse the contents of `src` into a stream of `Node`.
43    pub fn parse(src: &str) -> impl '_ + Iterator<Item = Result<Self>> {
44        let mut tokens = Token::parse_tokens(src);
45        std::iter::from_fn(move || Node::parse_next(src, &mut tokens))
46    }
47}
48
49impl Node {
50    /// Parse the contents of `src` into a vector of `Node`.
51    ///
52    /// For unit testing only, prefer using `Node::parse`.
53    #[cfg(test)]
54    pub fn parse_to_vec(src: &str) -> Result<Vec<Node>> {
55        Node::parse(src).collect()
56    }
57
58    /// Parse the next Node from `tokenizer`.
59    fn parse_next(src: &str, tokenizer: &mut impl Iterator<Item = Token>) -> Option<Result<Node>> {
60        while let Some(next_token) = tokenizer.next() {
61            match next_token.token_type {
62                TokenType::OpenParen => match Node::parse_until_close(src, tokenizer) {
63                    Ok((end, tree)) => {
64                        let span = next_token.span.extend_end(end);
65                        return Some(Ok(Node::Tree(span, tree)));
66                    }
67                    Err(err) => return Some(Err(err)),
68                },
69                TokenType::CloseParen => return Some(Err(AstParseError::UnexpectedCloseParen)),
70                TokenType::UnterminatedString => {
71                    return Some(Err(AstParseError::UnclosedString(next_token.span)))
72                }
73                TokenType::String | TokenType::Other => {
74                    return Some(Ok(Node::parse_atom(next_token, next_token.as_str(src))))
75                }
76                TokenType::Comment => continue,
77            }
78        }
79        None
80    }
81
82    /// Parse the nodes in `tokenizer` until a closing parenthesis is encountered.
83    ///
84    /// An error is returned if no closing parenthesis is ever encountered.
85    fn parse_until_close(
86        src: &str,
87        tokenizer: &mut impl Iterator<Item = Token>,
88    ) -> Result<(u32, Vec<Node>)> {
89        let mut tree = vec![];
90        while let Some(next_token) = tokenizer.next() {
91            match next_token.token_type {
92                TokenType::OpenParen => match Node::parse_until_close(src, tokenizer) {
93                    Ok((end, t)) => {
94                        let span = next_token.span.extend_end(end);
95                        tree.push(Node::Tree(span, t))
96                    }
97                    err @ Err(_) => return err,
98                },
99                TokenType::CloseParen => return Ok((next_token.span.end, tree)),
100                TokenType::UnterminatedString => {
101                    return Err(AstParseError::UnclosedString(next_token.span))
102                }
103                TokenType::String | TokenType::Other => {
104                    tree.push(Node::parse_atom(next_token, next_token.as_str(src)))
105                }
106                TokenType::Comment => continue,
107            }
108        }
109        Err(AstParseError::UnclosedParen)
110    }
111
112    /// Returns the string literal contained in the node or `None` if `self` is not a
113    /// [Node::String].
114    pub fn to_string_literal(&self, src: &str) -> Option<CompactString> {
115        let contents = match self {
116            Node::String(span) => span.with_src(src).as_str(),
117            _ => return None,
118        };
119        let mut res = CompactString::with_capacity(contents.len().saturating_sub(2));
120        let mut escaped = false;
121        for ch in contents[1..contents.len() - 1].chars() {
122            if escaped {
123                let ch = match ch {
124                    'n' => '\n',
125                    't' => '\t',
126                    ch => ch,
127                };
128                res.push(ch);
129                escaped = false;
130            } else {
131                match ch {
132                    '\\' => escaped = true,
133                    // Not a well formed string.
134                    '"' => return None,
135                    ch => res.push(ch),
136                }
137            }
138        }
139        Some(res)
140    }
141
142    /// Parse `contents` as if it were an atom. Panics if `token_type` does not correspond to an
143    /// atom.
144    fn parse_atom(token: Token, contents: &str) -> Node {
145        match token.token_type {
146            TokenType::OpenParen
147            | TokenType::CloseParen
148            | TokenType::UnterminatedString
149            | TokenType::Comment => {
150                // Unreachable OK: The above scenarios are caught by callers of `parse_atom`.
151                unreachable!()
152            }
153            TokenType::String => Node::String(token.span),
154            TokenType::Other => {
155                let maybe_is_number = contents
156                    .chars()
157                    .next()
158                    .map(|ch| {
159                        if ch.is_ascii_digit() {
160                            return true;
161                        }
162                        contents.len() > 1 && matches!(ch, '-' | '+')
163                    })
164                    .unwrap_or(false);
165                if maybe_is_number {
166                    if let Ok(int) = contents.parse() {
167                        return Node::Int(token.span, int);
168                    } else if let Ok(float) = contents.parse() {
169                        return Node::Float(token.span, float);
170                    }
171                }
172                match contents {
173                    "void" => Node::Void(token.span),
174                    "true" => Node::Bool(token.span, true),
175                    "false" => Node::Bool(token.span, false),
176                    _ => Node::Identifier(token.span),
177                }
178            }
179        }
180    }
181
182    pub fn span(&self) -> Span {
183        match self {
184            Node::Identifier(s) => *s,
185            Node::Void(s) => *s,
186            Node::Bool(s, _) => *s,
187            Node::Int(s, _) => *s,
188            Node::Float(s, _) => *s,
189            Node::String(s) => *s,
190            Node::Tree(s, _) => *s,
191        }
192    }
193}
194
195#[cfg(test)]
196mod tests {
197    use super::*;
198
199    #[test]
200    fn whitespace_produces_no_nodes() {
201        let src = " \t\n ";
202        let actual = Node::parse_to_vec(src).unwrap();
203        assert_eq!(actual, vec![]);
204    }
205
206    #[test]
207    fn atoms_are_parsed() {
208        let src = "1 2.0 three \"four\" true false";
209        let actual = Node::parse_to_vec(src).unwrap();
210        assert_eq!(
211            actual,
212            vec![
213                Node::Int(Span::new(0, 1), 1),
214                Node::Float(Span::new(2, 5), 2.0),
215                Node::Identifier(Span::new(6, 11)),
216                Node::String(Span::new(12, 18)),
217                Node::Bool(Span::new(19, 23), true),
218                Node::Bool(Span::new(24, 29), false),
219            ]
220        );
221    }
222
223    #[test]
224    fn expression_is_parsed_as_tree() {
225        let src = "(+ 1 (- a b) \"number\") (str-len \"str\") \"atom\"";
226        let actual = Node::parse_to_vec(src).unwrap();
227        assert_eq!(
228            actual,
229            vec![
230                Node::Tree(
231                    Span::new(0, 22),
232                    vec![
233                        Node::Identifier(Span::new(1, 2)),
234                        Node::Int(Span::new(3, 4), 1),
235                        Node::Tree(
236                            Span::new(5, 12),
237                            vec![
238                                Node::Identifier(Span::new(6, 7)),
239                                Node::Identifier(Span::new(8, 9)),
240                                Node::Identifier(Span::new(10, 11))
241                            ]
242                        ),
243                        Node::String(Span::new(13, 21)),
244                    ]
245                ),
246                Node::Tree(
247                    Span::new(23, 38),
248                    vec![
249                        Node::Identifier(Span::new(24, 31)),
250                        Node::String(Span::new(32, 37))
251                    ]
252                ),
253                Node::String(Span::new(39, 45)),
254            ]
255        );
256    }
257
258    #[test]
259    fn quoted_strings_within_strings_are_preserved() {
260        let src = "\"this \\\"is\\\" a string\"";
261        let actual = Node::parse_to_vec(src).unwrap();
262        assert_eq!(actual, vec![Node::String(Span::new(0, 22))]);
263    }
264
265    #[test]
266    fn backslash_with_n_produces_newline() {
267        let src = r#""\nn\n""#;
268        let actual = Node::parse_to_vec(src).unwrap();
269        assert_eq!(actual, vec![Node::String(Span::new(0, 7))]);
270        assert_eq!(actual[0].to_string_literal(src).unwrap(), "\nn\n");
271    }
272
273    #[test]
274    fn backslash_with_t_produces_tab() {
275        let src = "\"\\tt\\t\"";
276        let actual = Node::parse_to_vec(src).unwrap();
277        assert_eq!(actual, vec![Node::String(Span::new(0, 7))]);
278        assert_eq!(actual[0].to_string_literal(src).unwrap(), "\tt\t");
279    }
280
281    #[test]
282    fn backslash_with_backslash_produces_backslash() {
283        let src = r#""\\""#;
284        let actual = Node::parse_to_vec(src).unwrap();
285        assert_eq!(actual, vec![Node::String(Span::new(0, 4))]);
286        assert_eq!(actual[0].to_string_literal(src).unwrap(), "\\");
287    }
288
289    #[test]
290    fn unclosed_paren_produces_error() {
291        let src = "(not closed";
292        let actual_err = Node::parse_to_vec(src).unwrap_err();
293        assert_eq!(actual_err, AstParseError::UnclosedParen);
294    }
295
296    #[test]
297    fn unexpected_close_paren_produces_error() {
298        let src = "not closed)";
299        let actual_err = Node::parse_to_vec(src).unwrap_err();
300        assert_eq!(actual_err, AstParseError::UnexpectedCloseParen);
301    }
302
303    #[test]
304    fn unterminated_string_produces_error() {
305        let src = "\"start of string but no end";
306        let actual_err = Node::parse_to_vec(src).unwrap_err();
307        assert_eq!(actual_err, AstParseError::UnclosedString(Span::new(0, 27)));
308    }
309
310    #[test]
311    fn error_in_subexpression_is_returned() {
312        let src = "(((\"unterminated quote)";
313        let actual_err = Node::parse_to_vec(src).unwrap_err();
314        assert_eq!(actual_err, AstParseError::UnclosedString(Span::new(3, 23)));
315    }
316
317    #[test]
318    fn hacks_for_code_coverage() {
319        // There is not much value in testing this so calling function to appease code coverage
320        // tool.
321        AstParseError::UnclosedParen.to_string();
322    }
323}