1use compact_str::CompactString;
2use thiserror::Error;
3
4use super::{
5 span::Span,
6 tokenizer::{Token, TokenType},
7};
8
9type Result<T> = std::result::Result<T, AstParseError>;
10
11#[derive(Debug, Error, PartialEq)]
13pub enum AstParseError {
14 #[error("opening parenthesis was unclosed")]
15 UnclosedParen,
16 #[error("found unexpected closing parenthesis")]
17 UnexpectedCloseParen,
18 #[error("string was not properly closed, did you forget \"?")]
19 UnclosedString(Span),
20}
21
22#[derive(Debug, PartialEq)]
24pub enum Node {
25 Identifier(Span),
27 Void(Span),
29 Bool(Span, bool),
31 Int(Span, i64),
33 Float(Span, f64),
35 String(Span),
37 Tree(Span, Vec<Node>),
39}
40
41impl Node {
42 pub fn parse(src: &str) -> impl '_ + Iterator<Item = Result<Self>> {
44 let mut tokens = Token::parse_tokens(src);
45 std::iter::from_fn(move || Node::parse_next(src, &mut tokens))
46 }
47}
48
49impl Node {
50 #[cfg(test)]
54 pub fn parse_to_vec(src: &str) -> Result<Vec<Node>> {
55 Node::parse(src).collect()
56 }
57
58 fn parse_next(src: &str, tokenizer: &mut impl Iterator<Item = Token>) -> Option<Result<Node>> {
60 while let Some(next_token) = tokenizer.next() {
61 match next_token.token_type {
62 TokenType::OpenParen => match Node::parse_until_close(src, tokenizer) {
63 Ok((end, tree)) => {
64 let span = next_token.span.extend_end(end);
65 return Some(Ok(Node::Tree(span, tree)));
66 }
67 Err(err) => return Some(Err(err)),
68 },
69 TokenType::CloseParen => return Some(Err(AstParseError::UnexpectedCloseParen)),
70 TokenType::UnterminatedString => {
71 return Some(Err(AstParseError::UnclosedString(next_token.span)))
72 }
73 TokenType::String | TokenType::Other => {
74 return Some(Ok(Node::parse_atom(next_token, next_token.as_str(src))))
75 }
76 TokenType::Comment => continue,
77 }
78 }
79 None
80 }
81
82 fn parse_until_close(
86 src: &str,
87 tokenizer: &mut impl Iterator<Item = Token>,
88 ) -> Result<(u32, Vec<Node>)> {
89 let mut tree = vec![];
90 while let Some(next_token) = tokenizer.next() {
91 match next_token.token_type {
92 TokenType::OpenParen => match Node::parse_until_close(src, tokenizer) {
93 Ok((end, t)) => {
94 let span = next_token.span.extend_end(end);
95 tree.push(Node::Tree(span, t))
96 }
97 err @ Err(_) => return err,
98 },
99 TokenType::CloseParen => return Ok((next_token.span.end, tree)),
100 TokenType::UnterminatedString => {
101 return Err(AstParseError::UnclosedString(next_token.span))
102 }
103 TokenType::String | TokenType::Other => {
104 tree.push(Node::parse_atom(next_token, next_token.as_str(src)))
105 }
106 TokenType::Comment => continue,
107 }
108 }
109 Err(AstParseError::UnclosedParen)
110 }
111
112 pub fn to_string_literal(&self, src: &str) -> Option<CompactString> {
115 let contents = match self {
116 Node::String(span) => span.with_src(src).as_str(),
117 _ => return None,
118 };
119 let mut res = CompactString::with_capacity(contents.len().saturating_sub(2));
120 let mut escaped = false;
121 for ch in contents[1..contents.len() - 1].chars() {
122 if escaped {
123 let ch = match ch {
124 'n' => '\n',
125 't' => '\t',
126 ch => ch,
127 };
128 res.push(ch);
129 escaped = false;
130 } else {
131 match ch {
132 '\\' => escaped = true,
133 '"' => return None,
135 ch => res.push(ch),
136 }
137 }
138 }
139 Some(res)
140 }
141
142 fn parse_atom(token: Token, contents: &str) -> Node {
145 match token.token_type {
146 TokenType::OpenParen
147 | TokenType::CloseParen
148 | TokenType::UnterminatedString
149 | TokenType::Comment => {
150 unreachable!()
152 }
153 TokenType::String => Node::String(token.span),
154 TokenType::Other => {
155 let maybe_is_number = contents
156 .chars()
157 .next()
158 .map(|ch| {
159 if ch.is_ascii_digit() {
160 return true;
161 }
162 contents.len() > 1 && matches!(ch, '-' | '+')
163 })
164 .unwrap_or(false);
165 if maybe_is_number {
166 if let Ok(int) = contents.parse() {
167 return Node::Int(token.span, int);
168 } else if let Ok(float) = contents.parse() {
169 return Node::Float(token.span, float);
170 }
171 }
172 match contents {
173 "void" => Node::Void(token.span),
174 "true" => Node::Bool(token.span, true),
175 "false" => Node::Bool(token.span, false),
176 _ => Node::Identifier(token.span),
177 }
178 }
179 }
180 }
181
182 pub fn span(&self) -> Span {
183 match self {
184 Node::Identifier(s) => *s,
185 Node::Void(s) => *s,
186 Node::Bool(s, _) => *s,
187 Node::Int(s, _) => *s,
188 Node::Float(s, _) => *s,
189 Node::String(s) => *s,
190 Node::Tree(s, _) => *s,
191 }
192 }
193}
194
195#[cfg(test)]
196mod tests {
197 use super::*;
198
199 #[test]
200 fn whitespace_produces_no_nodes() {
201 let src = " \t\n ";
202 let actual = Node::parse_to_vec(src).unwrap();
203 assert_eq!(actual, vec![]);
204 }
205
206 #[test]
207 fn atoms_are_parsed() {
208 let src = "1 2.0 three \"four\" true false";
209 let actual = Node::parse_to_vec(src).unwrap();
210 assert_eq!(
211 actual,
212 vec![
213 Node::Int(Span::new(0, 1), 1),
214 Node::Float(Span::new(2, 5), 2.0),
215 Node::Identifier(Span::new(6, 11)),
216 Node::String(Span::new(12, 18)),
217 Node::Bool(Span::new(19, 23), true),
218 Node::Bool(Span::new(24, 29), false),
219 ]
220 );
221 }
222
223 #[test]
224 fn expression_is_parsed_as_tree() {
225 let src = "(+ 1 (- a b) \"number\") (str-len \"str\") \"atom\"";
226 let actual = Node::parse_to_vec(src).unwrap();
227 assert_eq!(
228 actual,
229 vec![
230 Node::Tree(
231 Span::new(0, 22),
232 vec![
233 Node::Identifier(Span::new(1, 2)),
234 Node::Int(Span::new(3, 4), 1),
235 Node::Tree(
236 Span::new(5, 12),
237 vec![
238 Node::Identifier(Span::new(6, 7)),
239 Node::Identifier(Span::new(8, 9)),
240 Node::Identifier(Span::new(10, 11))
241 ]
242 ),
243 Node::String(Span::new(13, 21)),
244 ]
245 ),
246 Node::Tree(
247 Span::new(23, 38),
248 vec![
249 Node::Identifier(Span::new(24, 31)),
250 Node::String(Span::new(32, 37))
251 ]
252 ),
253 Node::String(Span::new(39, 45)),
254 ]
255 );
256 }
257
258 #[test]
259 fn quoted_strings_within_strings_are_preserved() {
260 let src = "\"this \\\"is\\\" a string\"";
261 let actual = Node::parse_to_vec(src).unwrap();
262 assert_eq!(actual, vec![Node::String(Span::new(0, 22))]);
263 }
264
265 #[test]
266 fn backslash_with_n_produces_newline() {
267 let src = r#""\nn\n""#;
268 let actual = Node::parse_to_vec(src).unwrap();
269 assert_eq!(actual, vec![Node::String(Span::new(0, 7))]);
270 assert_eq!(actual[0].to_string_literal(src).unwrap(), "\nn\n");
271 }
272
273 #[test]
274 fn backslash_with_t_produces_tab() {
275 let src = "\"\\tt\\t\"";
276 let actual = Node::parse_to_vec(src).unwrap();
277 assert_eq!(actual, vec![Node::String(Span::new(0, 7))]);
278 assert_eq!(actual[0].to_string_literal(src).unwrap(), "\tt\t");
279 }
280
281 #[test]
282 fn backslash_with_backslash_produces_backslash() {
283 let src = r#""\\""#;
284 let actual = Node::parse_to_vec(src).unwrap();
285 assert_eq!(actual, vec![Node::String(Span::new(0, 4))]);
286 assert_eq!(actual[0].to_string_literal(src).unwrap(), "\\");
287 }
288
289 #[test]
290 fn unclosed_paren_produces_error() {
291 let src = "(not closed";
292 let actual_err = Node::parse_to_vec(src).unwrap_err();
293 assert_eq!(actual_err, AstParseError::UnclosedParen);
294 }
295
296 #[test]
297 fn unexpected_close_paren_produces_error() {
298 let src = "not closed)";
299 let actual_err = Node::parse_to_vec(src).unwrap_err();
300 assert_eq!(actual_err, AstParseError::UnexpectedCloseParen);
301 }
302
303 #[test]
304 fn unterminated_string_produces_error() {
305 let src = "\"start of string but no end";
306 let actual_err = Node::parse_to_vec(src).unwrap_err();
307 assert_eq!(actual_err, AstParseError::UnclosedString(Span::new(0, 27)));
308 }
309
310 #[test]
311 fn error_in_subexpression_is_returned() {
312 let src = "(((\"unterminated quote)";
313 let actual_err = Node::parse_to_vec(src).unwrap_err();
314 assert_eq!(actual_err, AstParseError::UnclosedString(Span::new(3, 23)));
315 }
316
317 #[test]
318 fn hacks_for_code_coverage() {
319 AstParseError::UnclosedParen.to_string();
322 }
323}