1use crate::cell::Cell;
2use crate::char::named_to_char;
3use crate::lex::TokenType::NumberPrefix;
4use crate::lex::{Token, TokenType};
5use crate::number::{Exactness, Number};
6use crate::parse::Error::{
7 ExpectedListTerminator, ExpectedVectorTerminator, Incomplete, UnexpectedToken, UnknownChar,
8};
9use crate::{lex, list};
10use std::iter::Peekable;
11
12#[derive(thiserror::Error, Debug, Eq, PartialEq)]
13pub enum Error {
14 #[error("incomplete")]
15 Incomplete,
16 #[error("unexpected token '{0}'")]
17 UnexpectedToken(String),
18 #[error("unexpected one token after .")]
19 ExpectedOneTokenAfterDot,
20 #[error("expected at least one token before .")]
21 ExpectedTokenBeforeDot,
22 #[error("expected list terminator {0}, but encountered {1}")]
23 ExpectedListTerminator(char, char),
24 #[error("expected vector terminator ), but encountered {0}")]
25 ExpectedVectorTerminator(char),
26 #[error("syntax error: {0}")]
27 SyntaxError(String),
28 #[error("unknown character {0}")]
29 UnknownChar(String),
30 #[error(transparent)]
31 LexError(#[from] lex::Error),
32}
33
34pub fn parse_text(text: &str) -> Result<(Cell, Option<&str>), Error> {
42 let tokens = lex::scan(text)?;
43 let mut cur = tokens.iter().peekable();
44 let cell = parse(text, &mut cur)?;
45
46 let remaining_text = cur.peek().map(|Token { span, .. }| &text[span.0..]);
47
48 Ok((cell, remaining_text))
49}
50
51pub fn parse<'a, T: Iterator<Item = &'a Token>>(
58 text: &str,
59 cur: &mut Peekable<T>,
60) -> Result<Cell, Error> {
61 let token = match cur.next() {
62 Some(token) => token,
63 None => return Err(Error::Incomplete),
64 };
65 match token.token_type {
66 TokenType::SingleQuote => Ok(list!["quote", parse(text, cur)?]),
67 TokenType::Quasiquote => Ok(list!["quasiquote", parse(text, cur)?]),
68 TokenType::Unquote => Ok(list!["unquote", parse(text, cur)?]),
69 TokenType::RightParen => Err(Error::UnexpectedToken(")".into())),
70 TokenType::LeftParen => parse_list(text, cur, token),
71 TokenType::HashParen => parse_vector(text, cur),
72 TokenType::True => Ok(Cell::Bool(true)),
73 TokenType::False => Ok(Cell::Bool(false)),
74 TokenType::Char => parse_char(text, token),
75 TokenType::String => parse_string(match token.span(text) {
76 "\"\"" => "",
77 span => &span[1..span.len() - 1],
78 }),
79 TokenType::Symbol => Ok(Cell::new_symbol(token.span(text))),
80 TokenType::NumberPrefix | TokenType::Number => parse_number(text, cur, token),
81 TokenType::Dot | TokenType::WhiteSpace => {
82 Err(Error::UnexpectedToken(token.span(text).into()))
83 }
84 }
85}
86
87fn parse_list<'a, T: Iterator<Item = &'a Token>>(
102 text: &str,
103 cur: &mut Peekable<T>,
104 start_token: &Token,
105) -> Result<Cell, Error> {
106 let mut list = vec![];
107 loop {
108 match cur.peek().ok_or(Error::Incomplete)?.token_type {
109 TokenType::RightParen => {
110 let start_token = start_token.span(text).chars().next().unwrap();
111 let end_token = cur.next().unwrap().span(text).chars().next().unwrap();
112 if !match start_token {
113 '(' => end_token == ')',
114 '[' => end_token == ']',
115 '{' => end_token == '}',
116 _ => false,
117 } {
118 return Err(ExpectedListTerminator(start_token, end_token));
119 }
120 return Ok(Cell::new_list(list));
121 }
122 TokenType::Dot => {
123 cur.next();
124 return parse_improper_list_tail(list, text, cur);
125 }
126 _ => {
127 list.push(parse(text, cur)?);
128 }
129 }
130 }
131}
132
133fn parse_improper_list_tail<'a, T: Iterator<Item = &'a Token>>(
144 list: Vec<Cell>,
145 text: &str,
146 cur: &mut Peekable<T>,
147) -> Result<Cell, Error> {
148 if list.is_empty() {
150 return Err(Error::ExpectedTokenBeforeDot);
151 }
152
153 let last_cdr = match cur.peek().ok_or(Error::Incomplete)?.token_type {
155 TokenType::Dot | TokenType::RightParen => Err(Error::ExpectedOneTokenAfterDot),
156 _ => Ok(parse(text, cur)?),
157 }?;
158
159 match cur.next().ok_or(Error::Incomplete)?.token_type {
161 TokenType::RightParen => Ok(Cell::new_improper_list(list, last_cdr)),
162 _ => Err(Error::ExpectedOneTokenAfterDot),
163 }
164}
165
166fn parse_vector<'a, T: Iterator<Item = &'a Token>>(
181 text: &str,
182 cur: &mut Peekable<T>,
183) -> Result<Cell, Error> {
184 let mut vector = vec![];
185 loop {
186 match cur.peek().ok_or(Error::Incomplete)?.token_type {
187 TokenType::RightParen => {
188 let end_token = cur.next().unwrap().span(text).chars().next().unwrap();
189 if end_token != ')' {
190 return Err(ExpectedVectorTerminator(end_token));
191 }
192 return Ok(Cell::Vector(vector));
193 }
194 TokenType::Dot => {
195 return Err(UnexpectedToken(".".into()));
196 }
197 _ => {
198 vector.push(parse(text, cur)?);
199 }
200 }
201 }
202}
203
204fn parse_char(text: &str, token: &Token) -> Result<Cell, Error> {
212 let span = token.span(text);
213 let span = &span[2..span.len()];
214 if span.chars().count() == 1 {
215 Ok(Cell::Char(span.chars().next().unwrap()))
216 } else if span.starts_with('x') && span[1..span.len()].chars().all(|it| it.is_ascii_hexdigit())
217 {
218 let c =
219 u32::from_str_radix(&span[1..span.len()], 16).map_err(|_| UnknownChar(span.into()))?;
220 let c = char::from_u32(c).ok_or_else(|| UnknownChar(span.into()))?;
221 Ok(Cell::Char(c))
222 } else {
223 named_to_char(span)
224 .map(Cell::Char)
225 .ok_or_else(|| Error::UnknownChar(span.into()))
226 }
227}
228
229pub fn parse_string(span: &str) -> Result<Cell, Error> {
233 let mut cur = span.chars().peekable();
234 let mut output = String::new();
235 while let Some(c) = cur.next() {
236 if c == '\\' {
237 output.push(match cur.peek() {
238 Some('\\') => '\\',
239 Some('a') => char::from_u32(0x7).unwrap(),
240 Some('b') => char::from_u32(0x8).unwrap(),
241 Some('e') => char::from_u32(0x1b).unwrap(),
242 Some('t') => '\t',
243 Some('n') => '\n',
244 Some('r') => '\r',
245 Some('v') => char::from_u32(0xb).unwrap(),
246 Some('f') => char::from_u32(0xc).unwrap(),
247 Some('x') => {
248 cur.next();
249 let mut unicode_value = 0_u32;
250 loop {
251 match cur.peek() {
252 Some(';') => break,
253 Some(c) if c.is_ascii_hexdigit() => {
254 unicode_value = unicode_value.checked_mul(16).ok_or_else(|| {
255 Error::SyntaxError("overflow when parsing \\x".into())
256 })?;
257 unicode_value = unicode_value
258 .checked_add(c.to_digit(16).unwrap())
259 .ok_or_else(|| {
260 Error::SyntaxError("overflow when parsing \\x".into())
261 })?;
262 }
263 Some(c) => {
264 return Err(Error::SyntaxError(format!(
265 "\\x expected hex digits but encountered '{}' in \"{}\"",
266 c, span
267 )))
268 }
269 None => {
270 return Err(Error::SyntaxError(format!(
271 "\\x must be terminated with ; in \"{}\"",
272 span
273 )))
274 }
275 }
276 cur.next();
277 }
278 char::from_u32(unicode_value).ok_or_else(|| {
279 Error::SyntaxError(format!("\\#x{:x}; is not valid unicode", unicode_value))
280 })?
281 }
282 Some(c) => *c,
283 None => return Err(Error::Incomplete),
284 });
285 cur.next();
286 } else {
287 output.push(c);
288 }
289 }
290
291 Ok(Cell::String(output))
292}
293
294fn parse_number<'a, T: Iterator<Item = &'a Token>>(
307 text: &str,
308 cur: &mut Peekable<T>,
309 mut token: &'a Token,
310) -> Result<Cell, Error> {
311 let mut exactness = Exactness::Unspecified;
312 let mut radix = 10;
313
314 while token.token_type == NumberPrefix {
315 match token.span(text) {
316 "#e" => exactness = Exactness::Exact,
317 "#i" => exactness = Exactness::Inexact,
318 "#d" => radix = 10,
319 "#b" => radix = 2,
320 "#o" => radix = 8,
321 "#x" => radix = 16,
322 _ => panic!("unexpected number prefix {}", token.span(text)),
323 }
324 token = cur.next().ok_or(Incomplete)?;
325 }
326
327 let span = token.span(text);
328 match Number::parse_with_exactness(span, exactness, radix) {
329 Some(num) => Ok(Cell::Number(num)),
330 None => Ok(Cell::Symbol(span.to_string())),
331 }
332}
333
334#[macro_export]
345macro_rules! parse {
346 ($lhs:expr) => {{
347 let tokens = lex::scan($lhs).expect("lex failed");
348 let mut cur = tokens.iter().peekable();
349 parse::parse($lhs, &mut cur).expect("parse failed")
350 }};
351}
352
353#[cfg(test)]
354mod tests {
355 use super::*;
356 use crate::cell;
357 use crate::cons;
358 use crate::lex;
359 use crate::list;
360 use crate::vector;
361
362 macro_rules! parses {
363 ($($lhs:expr => $rhs:expr),+) => {{
364 $(
365 assert_eq!(parse($lhs, &mut lex::scan($lhs).unwrap().iter().peekable()), Ok($rhs));
366 )+
367 }};
368 }
369
370 macro_rules! fails {
371 ($($lhs:expr),+) => {{
372 $(
373 assert!(matches!(parse($lhs, &mut lex::scan($lhs).unwrap().iter().peekable()), Err(_)));
374 )+
375 }};
376 }
377
378 #[test]
379 fn paren_mismatch() {
380 fails!["(", ")"];
381 }
382
383 #[test]
384 fn quote_sugar() {
385 parses! {
386 "'1" => list!["quote", 1],
387 "'(1 2)" => list!["quote", list![1, 2]]
388 };
389 }
390
391 #[test]
392 fn variables() {
393 parses! {
394 "foo" => cell!["foo"],
395 "bar" => cell!["bar"]
396 };
397 }
398
399 #[test]
400 fn consumes_one_expression_per_call() {
401 let text = "foo bar baz";
402 let tokens = lex::scan(text).unwrap();
403 let mut cur = (&tokens).iter().peekable();
404 assert_eq!(parse(text, &mut cur), Ok(cell!["foo"]));
405 assert_eq!(parse(text, &mut cur), Ok(cell!["bar"]));
406 assert_eq!(parse(text, &mut cur), Ok(cell!["baz"]));
407 assert_eq!(parse(text, &mut cur), Err(Error::Incomplete));
408 }
409
410 #[test]
411 fn lists_are_fully_consumed() {
412 let text = "(foo bar)";
413 let tokens = lex::scan(text).unwrap();
414 let mut cur = (&tokens).iter().peekable();
415 assert_eq!(parse(text, &mut cur), Ok(list!["foo", "bar"]));
416 assert_eq!(parse(text, &mut cur), Err(Error::Incomplete));
417 }
418
419 #[test]
420 fn procedures() {
421 parses! {
422 "(foo)" => list!["foo"],
423 "( foo )" => list!["foo"],
424 "(foo bar baz)" => list!["foo", "bar", "baz"],
425 "()" => cell![],
426 "( )" => cell![]
427 };
428 }
429
430 #[test]
431 fn quasiquote() {
432 parses! {
433 "`(1 2 3)" => list!["quasiquote", list![1,2,3]],
434 ",(1 2 3)" => list!["unquote", list![1,2,3]]
435 };
436 }
437
438 #[test]
439 fn dotted_form() {
440 parses! {
441 "(0 . 2)" => cons![0, 2],
442 "(0 1 . 2)" => cons![0, cons![1, 2]],
443 "(1 2 . (3 4))" => list![1, 2, 3, 4],
444 "((1 . 2) . 3)" => cons![cons![1, 2], 3],
445 "((().()).())" => cons![cons![cell![], cell![]], cell![]]
446 };
447 fails!["(.)", "(. 0)", "(0 .)", "(0 .)", "(0 1 .)", "(1 . 2 . 3)"];
448 }
449
450 #[test]
451 fn vectors() {
452 parses! {
453 "#()" => vector![],
454 "#(1)" => vector![1],
455 "#(1 2 3)" => vector![1,2,3],
456 "#(#(1 2 3) #(4 5 6))" => vector![vector![1,2,3], vector![4,5,6]],
457 "#('foo 'bar)" => vector![list!["quote", "foo"], list!["quote", "bar"]]
458 };
459
460 fails!["#(1 2 3}", "#(1 2 3]", "#(1 2 . 3)"];
461 }
462
463 #[test]
464 fn numbers() {
465 parses! {
466 "42" => cell![42],
467 "+42" => cell![42],
468 "-42" => cell![-42],
469 "42..1" => cell!["42..1"]
470 };
471 parses! {
472 "#xff" => cell![255],
473 "#b11111111" => cell![255],
474 "#o377" => cell![255]
475 };
476 parses! {
477 "4/2" => cell![2],
478 "1.0" => cell![1]
479 }
480
481 parses! {
482 "#x7fffffff/1" => cell![0x7fffffff],
483 "#xffffffff/1" => cell![0xffffffff]
484 }
485 }
486
487 #[test]
488 fn characters() {
489 parses! {
490 "#\\c" => cell!['c'],
491 "#\\space" => cell![' '],
492 "#\\newline" => cell!['\n'],
493 "#\\x03bb" => cell!['λ'],
494 "#\\x03BB" => cell!['λ'],
495 "#\\x3bb" => cell!['λ']
496 }
497 }
498
499 #[test]
500 fn strings() {
501 parses! {
502 r#""""# => Cell::new_string(""),
503 r#""foo""# => Cell::new_string("foo"),
504 r#""foo \"bar\" baz""# => Cell::new_string("foo \"bar\" baz"),
505 r#""foo \\ baz""# => Cell::new_string("foo \\ baz"),
506 r#""\t""# => Cell::new_string("\t"),
507 r#""\n""# => Cell::new_string("\n"),
508 r#""\a""# => Cell::new_string(&String::from(char::from_u32(0x7).unwrap())),
509 r#""\a""# => Cell::new_string(&String::from(char::from_u32(0x7).unwrap())),
510 r#""\b""# => Cell::new_string(&String::from(char::from_u32(0x8).unwrap())),
511 r#""\e""# => Cell::new_string(&String::from(char::from_u32(0x1b).unwrap())),
512 r#""\x41;""# => Cell::new_string("A"),
513 r#""\x1f436;""# => Cell::new_string("🐶")
514 };
515
516 fails![r#""\xffffffff;""#, r#""\x41""#, r#""\xzz;""#];
517 }
518
519 #[test]
520 fn expressions() {
521 parses! {
522 "foo" => cell!["foo"],
523 "42" => cell![42],
524 "-18" => cell![-18],
525 "(foo)" => list!["foo"],
526 "(foo (bar baz))" => list!["foo", list!["bar", "baz"]]
527 };
528 }
529
530 #[test]
531 fn alt_paren_chars() {
532 parses! {
533 "[1 2 3]" => list![1, 2, 3],
534 "{1 2 3}" => list![1, 2, 3]
535 };
536
537 fails!["'(1 2 3]", "'(4 5 6}", "'{1 2 3]", "'[1 2 3}"];
538 }
539}