1use std::collections::BTreeMap;
2use std::rc::Rc;
3
4use sema_core::{SemaError, Span, SpanMap, Value};
5
6use crate::lexer::{tokenize, SpannedToken, Token};
7
8struct Parser {
9 tokens: Vec<SpannedToken>,
10 pos: usize,
11 span_map: SpanMap,
12}
13
14impl Parser {
15 fn new(tokens: Vec<SpannedToken>) -> Self {
16 Parser {
17 tokens,
18 pos: 0,
19 span_map: SpanMap::new(),
20 }
21 }
22
23 fn peek(&self) -> Option<&Token> {
24 self.tokens.get(self.pos).map(|t| &t.token)
25 }
26
27 fn span(&self) -> Span {
28 self.tokens
29 .get(self.pos)
30 .map(|t| t.span)
31 .unwrap_or(Span::point(0, 0))
32 }
33
34 fn advance(&mut self) -> Option<&SpannedToken> {
35 let tok = self.tokens.get(self.pos);
36 if tok.is_some() {
37 self.pos += 1;
38 }
39 tok
40 }
41
42 fn expect(&mut self, expected: &Token) -> Result<(), SemaError> {
43 let span = self.span();
44 match self.advance() {
45 Some(t) if &t.token == expected => Ok(()),
46 Some(t) => Err(SemaError::Reader {
47 message: format!(
48 "expected `{}`, got `{}`",
49 token_display(expected),
50 token_display(&t.token)
51 ),
52 span,
53 }),
54 None => Err(SemaError::Reader {
55 message: format!("expected `{}`, got end of input", token_display(expected)),
56 span,
57 }),
58 }
59 }
60
61 fn parse_expr(&mut self) -> Result<Value, SemaError> {
62 let span = self.span();
63 match self.peek() {
64 None => Err(SemaError::Reader {
65 message: "unexpected end of input".to_string(),
66 span,
67 }),
68 Some(Token::LParen) => self.parse_list(),
69 Some(Token::LBracket) => self.parse_vector(),
70 Some(Token::LBrace) => self.parse_map(),
71 Some(Token::Quote) => {
72 self.advance();
73 let inner = self.parse_expr().map_err(|_| {
74 SemaError::Reader {
75 message: "quote (') requires an expression after it".to_string(),
76 span,
77 }
78 .with_hint("e.g. '(1 2 3) or 'foo")
79 })?;
80 self.make_list_with_span(vec![Value::symbol("quote"), inner], span)
81 }
82 Some(Token::Quasiquote) => {
83 self.advance();
84 let inner = self.parse_expr().map_err(|_| {
85 SemaError::Reader {
86 message: "quasiquote (`) requires an expression after it".to_string(),
87 span,
88 }
89 .with_hint("e.g. `(list ,x)")
90 })?;
91 self.make_list_with_span(vec![Value::symbol("quasiquote"), inner], span)
92 }
93 Some(Token::Unquote) => {
94 self.advance();
95 let inner = self.parse_expr().map_err(|_| {
96 SemaError::Reader {
97 message: "unquote (,) requires an expression after it".to_string(),
98 span,
99 }
100 .with_hint("use inside quasiquote, e.g. `(list ,x)")
101 })?;
102 self.make_list_with_span(vec![Value::symbol("unquote"), inner], span)
103 }
104 Some(Token::UnquoteSplice) => {
105 self.advance();
106 let inner = self.parse_expr().map_err(|_| {
107 SemaError::Reader {
108 message: "unquote-splicing (,@) requires an expression after it"
109 .to_string(),
110 span,
111 }
112 .with_hint("use inside quasiquote, e.g. `(list ,@xs)")
113 })?;
114 self.make_list_with_span(vec![Value::symbol("unquote-splicing"), inner], span)
115 }
116 Some(Token::BytevectorStart) => self.parse_bytevector(),
117 Some(_) => self.parse_atom(),
118 }
119 }
120
121 fn make_list_with_span(&mut self, items: Vec<Value>, span: Span) -> Result<Value, SemaError> {
122 let rc = Rc::new(items);
123 let ptr = Rc::as_ptr(&rc) as usize;
124 self.span_map.insert(ptr, span);
125 Ok(Value::list_from_rc(rc))
126 }
127
128 fn prev_span(&self) -> Span {
130 if self.pos > 0 {
131 self.tokens[self.pos - 1].span
132 } else {
133 Span::point(0, 0)
134 }
135 }
136
137 fn parse_list(&mut self) -> Result<Value, SemaError> {
138 let open_span = self.span();
139 self.expect(&Token::LParen)?;
140 let mut items = Vec::new();
141 while self.peek() != Some(&Token::RParen) {
142 if self.peek().is_none() {
143 return Err(SemaError::Reader {
144 message: "unterminated list".to_string(),
145 span: open_span,
146 }
147 .with_hint("add a closing `)`"));
148 }
149 if self.peek() == Some(&Token::Dot) {
151 self.advance(); let cdr = self.parse_expr()?;
153 self.expect(&Token::RParen)?;
154 let close = self.prev_span();
155 items.push(Value::symbol("."));
156 items.push(cdr);
157 return self.make_list_with_span(items, open_span.to(&close));
158 }
159 items.push(self.parse_expr()?);
160 }
161 self.expect(&Token::RParen)?;
162 let close = self.prev_span();
163 self.make_list_with_span(items, open_span.to(&close))
164 }
165
166 fn parse_vector(&mut self) -> Result<Value, SemaError> {
167 let open_span = self.span();
168 self.expect(&Token::LBracket)?;
169 let mut items = Vec::new();
170 while self.peek() != Some(&Token::RBracket) {
171 if self.peek().is_none() {
172 return Err(SemaError::Reader {
173 message: "unterminated vector".to_string(),
174 span: open_span,
175 }
176 .with_hint("add a closing `]`"));
177 }
178 items.push(self.parse_expr()?);
179 }
180 self.expect(&Token::RBracket)?;
181 let close = self.prev_span();
182 let rc = Rc::new(items);
183 let ptr = Rc::as_ptr(&rc) as usize;
184 self.span_map.insert(ptr, open_span.to(&close));
185 Ok(Value::vector_from_rc(rc))
186 }
187
188 fn parse_map(&mut self) -> Result<Value, SemaError> {
189 let open_span = self.span();
190 self.expect(&Token::LBrace)?;
191 let mut map = BTreeMap::new();
192 while self.peek() != Some(&Token::RBrace) {
193 if self.peek().is_none() {
194 return Err(SemaError::Reader {
195 message: "unterminated map".to_string(),
196 span: open_span,
197 }
198 .with_hint("add a closing `}`"));
199 }
200 let key = self.parse_expr()?;
201 if self.peek() == Some(&Token::RBrace) || self.peek().is_none() {
202 return Err(SemaError::Reader {
203 message: "map literal must have even number of forms".to_string(),
204 span: self.span(),
205 });
206 }
207 let val = self.parse_expr()?;
208 map.insert(key, val);
209 }
210 self.expect(&Token::RBrace)?;
211 Ok(Value::map(map))
212 }
213
214 fn parse_bytevector(&mut self) -> Result<Value, SemaError> {
215 let open_span = self.span();
216 self.advance(); let mut bytes = Vec::new();
218 while self.peek() != Some(&Token::RParen) {
219 if self.peek().is_none() {
220 return Err(SemaError::Reader {
221 message: "unterminated bytevector".to_string(),
222 span: open_span,
223 }
224 .with_hint("add a closing `)`"));
225 }
226 let span = self.span();
227 match self.peek() {
228 Some(Token::Int(n)) => {
229 let n = *n;
230 self.advance();
231 if !(0..=255).contains(&n) {
232 return Err(SemaError::Reader {
233 message: format!("#u8(...): byte value {n} out of range 0..255"),
234 span,
235 });
236 }
237 bytes.push(n as u8);
238 }
239 _ => {
240 return Err(SemaError::Reader {
241 message: "#u8(...): expected integer byte value".to_string(),
242 span,
243 });
244 }
245 }
246 }
247 self.expect(&Token::RParen)?;
248 Ok(Value::bytevector(bytes))
249 }
250
251 fn parse_atom(&mut self) -> Result<Value, SemaError> {
252 let span = self.span();
253 match self.advance() {
254 Some(SpannedToken {
255 token: Token::Int(n),
256 ..
257 }) => Ok(Value::int(*n)),
258 Some(SpannedToken {
259 token: Token::Float(f),
260 ..
261 }) => Ok(Value::float(*f)),
262 Some(SpannedToken {
263 token: Token::String(s),
264 ..
265 }) => Ok(Value::string(s)),
266 Some(SpannedToken {
267 token: Token::Symbol(s),
268 ..
269 }) => {
270 if s == "nil" {
271 Ok(Value::nil())
272 } else {
273 Ok(Value::symbol(s))
274 }
275 }
276 Some(SpannedToken {
277 token: Token::Keyword(s),
278 ..
279 }) => Ok(Value::keyword(s)),
280 Some(SpannedToken {
281 token: Token::Bool(b),
282 ..
283 }) => Ok(Value::bool(*b)),
284 Some(SpannedToken {
285 token: Token::Char(c),
286 ..
287 }) => Ok(Value::char(*c)),
288 Some(t) => {
289 let (name, hint) = match &t.token {
290 Token::RParen => (
291 "unexpected closing `)`",
292 Some("no matching opening parenthesis"),
293 ),
294 Token::RBracket => (
295 "unexpected closing `]`",
296 Some("no matching opening bracket"),
297 ),
298 Token::RBrace => ("unexpected closing `}`", Some("no matching opening brace")),
299 Token::Dot => (
300 "unexpected `.`",
301 Some("dots are used in pair notation, e.g. (a . b)"),
302 ),
303 _ => ("unexpected token", None),
304 };
305 let err = SemaError::Reader {
306 message: name.to_string(),
307 span,
308 };
309 Err(if let Some(h) = hint {
310 err.with_hint(h)
311 } else {
312 err
313 })
314 }
315 None => Err(SemaError::Reader {
316 message: "unexpected end of input".to_string(),
317 span,
318 }),
319 }
320 }
321}
322
323fn token_display(tok: &Token) -> &'static str {
324 match tok {
325 Token::LParen => "(",
326 Token::RParen => ")",
327 Token::LBracket => "[",
328 Token::RBracket => "]",
329 Token::LBrace => "{",
330 Token::RBrace => "}",
331 Token::Quote => "'",
332 Token::Quasiquote => "`",
333 Token::Unquote => ",",
334 Token::UnquoteSplice => ",@",
335 Token::Dot => ".",
336 Token::BytevectorStart => "#u8(",
337 Token::Int(_) => "integer",
338 Token::Float(_) => "float",
339 Token::String(_) => "string",
340 Token::Symbol(_) => "symbol",
341 Token::Keyword(_) => "keyword",
342 Token::Bool(_) => "boolean",
343 Token::Char(_) => "character",
344 }
345}
346
347pub fn read(input: &str) -> Result<Value, SemaError> {
349 let tokens = tokenize(input)?;
350 if tokens.is_empty() {
351 return Ok(Value::nil());
352 }
353 let mut parser = Parser::new(tokens);
354 parser.parse_expr()
355}
356
357pub fn read_many(input: &str) -> Result<Vec<Value>, SemaError> {
359 let tokens = tokenize(input)?;
360 if tokens.is_empty() {
361 return Ok(Vec::new());
362 }
363 let mut parser = Parser::new(tokens);
364 let mut exprs = Vec::new();
365 while parser.peek().is_some() {
366 exprs.push(parser.parse_expr()?);
367 }
368 Ok(exprs)
369}
370
371pub fn read_many_with_spans(input: &str) -> Result<(Vec<Value>, SpanMap), SemaError> {
373 let tokens = tokenize(input)?;
374 if tokens.is_empty() {
375 return Ok((Vec::new(), SpanMap::new()));
376 }
377 let mut parser = Parser::new(tokens);
378 let mut exprs = Vec::new();
379 while parser.peek().is_some() {
380 exprs.push(parser.parse_expr()?);
381 }
382 Ok((exprs, parser.span_map))
383}
384
385#[cfg(test)]
386mod tests {
387 use super::*;
388
389 #[test]
390 fn test_read_int() {
391 assert_eq!(read("42").unwrap(), Value::int(42));
392 }
393
394 #[test]
395 fn test_read_negative_int() {
396 assert_eq!(read("-7").unwrap(), Value::int(-7));
397 }
398
399 #[test]
400 fn test_read_float() {
401 assert_eq!(read("3.14").unwrap(), Value::float(3.14));
402 }
403
404 #[test]
405 fn test_read_string() {
406 assert_eq!(read("\"hello\"").unwrap(), Value::string("hello"));
407 }
408
409 #[test]
410 fn test_read_symbol() {
411 assert_eq!(read("foo").unwrap(), Value::symbol("foo"));
412 }
413
414 #[test]
415 fn test_read_keyword() {
416 assert_eq!(read(":bar").unwrap(), Value::keyword("bar"));
417 }
418
419 #[test]
420 fn test_read_bool() {
421 assert_eq!(read("#t").unwrap(), Value::bool(true));
422 assert_eq!(read("#f").unwrap(), Value::bool(false));
423 }
424
425 #[test]
426 fn test_read_list() {
427 let result = read("(+ 1 2)").unwrap();
428 assert_eq!(
429 result,
430 Value::list(vec![Value::symbol("+"), Value::int(1), Value::int(2)])
431 );
432 }
433
434 #[test]
435 fn test_read_nested_list() {
436 let result = read("(* (+ 1 2) 3)").unwrap();
437 assert_eq!(
438 result,
439 Value::list(vec![
440 Value::symbol("*"),
441 Value::list(vec![Value::symbol("+"), Value::int(1), Value::int(2)]),
442 Value::int(3)
443 ])
444 );
445 }
446
447 #[test]
448 fn test_read_vector() {
449 let result = read("[1 2 3]").unwrap();
450 assert_eq!(
451 result,
452 Value::vector(vec![Value::int(1), Value::int(2), Value::int(3)])
453 );
454 }
455
456 #[test]
457 fn test_read_map() {
458 let result = read("{:a 1 :b 2}").unwrap();
459 let mut expected = BTreeMap::new();
460 expected.insert(Value::keyword("a"), Value::int(1));
461 expected.insert(Value::keyword("b"), Value::int(2));
462 assert_eq!(result, Value::map(expected));
463 }
464
465 #[test]
466 fn test_read_quote() {
467 let result = read("'foo").unwrap();
468 assert_eq!(
469 result,
470 Value::list(vec![Value::symbol("quote"), Value::symbol("foo")])
471 );
472 }
473
474 #[test]
475 fn test_read_quasiquote() {
476 let result = read("`(a ,b ,@c)").unwrap();
477 assert_eq!(
478 result,
479 Value::list(vec![
480 Value::symbol("quasiquote"),
481 Value::list(vec![
482 Value::symbol("a"),
483 Value::list(vec![Value::symbol("unquote"), Value::symbol("b")]),
484 Value::list(vec![Value::symbol("unquote-splicing"), Value::symbol("c")]),
485 ])
486 ])
487 );
488 }
489
490 #[test]
491 fn test_read_nil() {
492 assert_eq!(read("nil").unwrap(), Value::nil());
493 }
494
495 #[test]
496 fn test_read_many_exprs() {
497 let results = read_many("1 2 3").unwrap();
498 assert_eq!(results, vec![Value::int(1), Value::int(2), Value::int(3)]);
499 }
500
501 #[test]
502 fn test_comments() {
503 let result = read_many("; comment\n(+ 1 2)").unwrap();
504 assert_eq!(result.len(), 1);
505 }
506
507 #[test]
508 fn test_read_zero() {
509 assert_eq!(read("0").unwrap(), Value::int(0));
510 }
511
512 #[test]
513 fn test_read_negative_zero() {
514 assert_eq!(read("-0").unwrap(), Value::int(0));
515 }
516
517 #[test]
518 fn test_read_leading_zeros() {
519 assert_eq!(read("007").unwrap(), Value::int(7));
520 }
521
522 #[test]
523 fn test_read_large_int() {
524 assert_eq!(read("9999999999999").unwrap(), Value::int(9999999999999));
525 }
526
527 #[test]
528 fn test_read_int_overflow() {
529 assert!(read("9999999999999999999999").is_err());
531 }
532
533 #[test]
534 fn test_read_negative_float() {
535 assert_eq!(read("-2.5").unwrap(), Value::float(-2.5));
536 }
537
538 #[test]
539 fn test_read_float_leading_zero() {
540 assert_eq!(read("0.5").unwrap(), Value::float(0.5));
541 }
542
543 #[test]
544 fn test_read_minus_is_symbol() {
545 assert_eq!(read("-").unwrap(), Value::symbol("-"));
547 }
548
549 #[test]
550 fn test_read_minus_in_list() {
551 let result = read("(- 3)").unwrap();
553 assert_eq!(result, Value::list(vec![Value::symbol("-"), Value::int(3)]));
554 }
555
556 #[test]
557 fn test_read_negative_in_list() {
558 let result = read("(-3)").unwrap();
560 assert_eq!(result, Value::list(vec![Value::int(-3)]));
561 }
562
563 #[test]
564 fn test_read_empty_string() {
565 assert_eq!(read(r#""""#).unwrap(), Value::string(""));
566 }
567
568 #[test]
569 fn test_read_string_with_escapes() {
570 assert_eq!(
571 read(r#""\n\t\r\\\"" "#).unwrap(),
572 Value::string("\n\t\r\\\"")
573 );
574 }
575
576 #[test]
577 fn test_read_string_unknown_escape() {
578 assert_eq!(read(r#""\z""#).unwrap(), Value::string("\\z"));
580 }
581
582 #[test]
583 fn test_read_string_with_newline() {
584 assert_eq!(
585 read("\"line1\nline2\"").unwrap(),
586 Value::string("line1\nline2")
587 );
588 }
589
590 #[test]
591 fn test_read_unterminated_string() {
592 assert!(read("\"hello").is_err());
593 }
594
595 #[test]
596 fn test_read_string_escaped_quote_at_end() {
597 assert!(read(r#""test\""#).is_err());
599 }
600
601 #[test]
602 fn test_read_string_with_unicode() {
603 assert_eq!(read("\"héllo\"").unwrap(), Value::string("héllo"));
604 assert_eq!(read("\"日本語\"").unwrap(), Value::string("日本語"));
605 assert_eq!(read("\"🎉\"").unwrap(), Value::string("🎉"));
606 }
607
608 #[test]
609 fn test_read_string_with_parens() {
610 assert_eq!(read("\"(+ 1 2)\"").unwrap(), Value::string("(+ 1 2)"));
611 }
612
613 #[test]
614 fn test_read_operator_symbols() {
615 assert_eq!(read("+").unwrap(), Value::symbol("+"));
616 assert_eq!(read("*").unwrap(), Value::symbol("*"));
617 assert_eq!(read("/").unwrap(), Value::symbol("/"));
618 assert_eq!(read("<=").unwrap(), Value::symbol("<="));
619 assert_eq!(read(">=").unwrap(), Value::symbol(">="));
620 }
621
622 #[test]
623 fn test_read_predicate_symbols() {
624 assert_eq!(read("null?").unwrap(), Value::symbol("null?"));
625 assert_eq!(read("list?").unwrap(), Value::symbol("list?"));
626 }
627
628 #[test]
629 fn test_read_arrow_symbols() {
630 assert_eq!(
631 read("string->symbol").unwrap(),
632 Value::symbol("string->symbol")
633 );
634 }
635
636 #[test]
637 fn test_read_namespaced_symbols() {
638 assert_eq!(read("file/read").unwrap(), Value::symbol("file/read"));
639 assert_eq!(read("http/get").unwrap(), Value::symbol("http/get"));
640 }
641
642 #[test]
643 fn test_read_true_false_as_bool() {
644 assert_eq!(read("true").unwrap(), Value::bool(true));
645 assert_eq!(read("false").unwrap(), Value::bool(false));
646 }
647
648 #[test]
649 fn test_read_bare_colon_error() {
650 assert!(read(":").is_err());
652 }
653
654 #[test]
655 fn test_read_keyword_with_numbers() {
656 assert_eq!(read(":foo123").unwrap(), Value::keyword("foo123"));
657 }
658
659 #[test]
660 fn test_read_keyword_with_hyphens() {
661 assert_eq!(read(":max-turns").unwrap(), Value::keyword("max-turns"));
662 }
663
664 #[test]
665 fn test_read_hash_invalid() {
666 assert!(read("#x").is_err());
667 assert!(read("#").is_err());
668 }
669
670 #[test]
671 fn test_read_empty() {
672 assert_eq!(read("").unwrap(), Value::nil());
673 }
674
675 #[test]
676 fn test_read_whitespace_only() {
677 assert_eq!(read(" \n\t ").unwrap(), Value::nil());
678 }
679
680 #[test]
681 fn test_read_many_empty() {
682 assert_eq!(read_many("").unwrap(), vec![]);
683 }
684
685 #[test]
686 fn test_read_many_whitespace_only() {
687 assert_eq!(read_many(" \n ").unwrap(), vec![]);
688 }
689
690 #[test]
691 fn test_read_comment_only() {
692 assert_eq!(read_many("; just a comment").unwrap(), vec![]);
693 }
694
695 #[test]
696 fn test_read_empty_list() {
697 assert_eq!(read("()").unwrap(), Value::list(vec![]));
698 }
699
700 #[test]
701 fn test_read_deeply_nested() {
702 let result = read("((((42))))").unwrap();
703 assert_eq!(
704 result,
705 Value::list(vec![Value::list(vec![Value::list(vec![Value::list(
706 vec![Value::int(42)]
707 )])])])
708 );
709 }
710
711 #[test]
712 fn test_read_unterminated_list() {
713 assert!(read("(1 2").is_err());
714 }
715
716 #[test]
717 fn test_read_extra_rparen() {
718 let result = read("42").unwrap();
721 assert_eq!(result, Value::int(42));
722 }
723
724 #[test]
725 fn test_read_dotted_pair() {
726 let result = read("(a . b)").unwrap();
727 assert_eq!(
728 result,
729 Value::list(vec![
730 Value::symbol("a"),
731 Value::symbol("."),
732 Value::symbol("b")
733 ])
734 );
735 }
736
737 #[test]
738 fn test_read_empty_vector() {
739 assert_eq!(read("[]").unwrap(), Value::vector(vec![]));
740 }
741
742 #[test]
743 fn test_read_unterminated_vector() {
744 assert!(read("[1 2").is_err());
745 }
746
747 #[test]
748 fn test_read_empty_map() {
749 assert_eq!(read("{}").unwrap(), Value::map(BTreeMap::new()));
750 }
751
752 #[test]
753 fn test_read_unterminated_map() {
754 assert!(read("{:a 1").is_err());
755 }
756
757 #[test]
758 fn test_read_map_odd_elements() {
759 assert!(read("{:a 1 :b}").is_err());
760 }
761
762 #[test]
763 fn test_read_map_duplicate_keys() {
764 let result = read("{:a 1 :a 2}").unwrap();
766 let mut expected = BTreeMap::new();
767 expected.insert(Value::keyword("a"), Value::int(2));
768 assert_eq!(result, Value::map(expected));
769 }
770
771 #[test]
772 fn test_read_nested_quote() {
773 let result = read("''foo").unwrap();
774 assert_eq!(
775 result,
776 Value::list(vec![
777 Value::symbol("quote"),
778 Value::list(vec![Value::symbol("quote"), Value::symbol("foo")])
779 ])
780 );
781 }
782
783 #[test]
784 fn test_read_quote_list() {
785 let result = read("'(1 2 3)").unwrap();
786 assert_eq!(
787 result,
788 Value::list(vec![
789 Value::symbol("quote"),
790 Value::list(vec![Value::int(1), Value::int(2), Value::int(3)])
791 ])
792 );
793 }
794
795 #[test]
796 fn test_read_quote_at_eof() {
797 assert!(read("'").is_err());
798 }
799
800 #[test]
801 fn test_read_unquote_at_eof() {
802 assert!(read(",").is_err());
803 }
804
805 #[test]
806 fn test_read_unquote_splice_at_eof() {
807 assert!(read(",@").is_err());
808 }
809
810 #[test]
811 fn test_read_quasiquote_at_eof() {
812 assert!(read("`").is_err());
813 }
814
815 #[test]
816 fn test_read_comment_after_expr() {
817 assert_eq!(read_many("42 ; comment").unwrap(), vec![Value::int(42)]);
818 }
819
820 #[test]
821 fn test_read_multiple_comments() {
822 let result = read_many("; first\n; second\n42").unwrap();
823 assert_eq!(result, vec![Value::int(42)]);
824 }
825
826 #[test]
827 fn test_read_comment_no_newline() {
828 assert_eq!(read_many("; comment").unwrap(), vec![]);
830 }
831
832 #[test]
833 fn test_read_crlf_line_endings() {
834 let result = read_many("1\r\n2\r\n3").unwrap();
835 assert_eq!(result, vec![Value::int(1), Value::int(2), Value::int(3)]);
836 }
837
838 #[test]
839 fn test_read_tabs_as_whitespace() {
840 assert_eq!(
841 read("(\t+\t1\t2\t)").unwrap(),
842 Value::list(vec![Value::symbol("+"), Value::int(1), Value::int(2)])
843 );
844 }
845
846 #[test]
847 fn test_read_mixed_collections() {
848 let result = read("([1 2] {:a 3})").unwrap();
850 let mut map = BTreeMap::new();
851 map.insert(Value::keyword("a"), Value::int(3));
852 assert_eq!(
853 result,
854 Value::list(vec![
855 Value::vector(vec![Value::int(1), Value::int(2)]),
856 Value::map(map)
857 ])
858 );
859 }
860
861 #[test]
862 fn test_read_many_mixed_types() {
863 let result = read_many(r#"42 3.14 "hello" foo :bar #t nil"#).unwrap();
864 assert_eq!(result.len(), 7);
865 assert_eq!(result[0], Value::int(42));
866 assert_eq!(result[1], Value::float(3.14));
867 assert_eq!(result[2], Value::string("hello"));
868 assert_eq!(result[3], Value::symbol("foo"));
869 assert_eq!(result[4], Value::keyword("bar"));
870 assert_eq!(result[5], Value::bool(true));
871 assert_eq!(result[6], Value::nil());
872 }
873
874 #[test]
875 fn test_span_map_tracks_lists() {
876 let (exprs, spans) = read_many_with_spans("(+ 1 2)").unwrap();
877 assert_eq!(exprs.len(), 1);
878 let rc = exprs[0].as_list_rc().expect("expected list");
880 let ptr = Rc::as_ptr(&rc) as usize;
881 let span = spans.get(&ptr).expect("list should have span");
882 assert_eq!(span.line, 1);
883 assert_eq!(span.col, 1);
884 }
885
886 #[test]
887 fn test_span_map_multiline() {
888 let (exprs, spans) = read_many_with_spans("(foo)\n(bar)").unwrap();
889 assert_eq!(exprs.len(), 2);
890 let rc = exprs[1].as_list_rc().expect("expected list");
891 let ptr = Rc::as_ptr(&rc) as usize;
892 let span = spans.get(&ptr).expect("second list should have span");
893 assert_eq!(span.line, 2);
894 assert_eq!(span.col, 1);
895 }
896
897 #[test]
898 fn test_read_unexpected_char() {
899 assert!(read("@").is_err());
900 assert!(read("$").is_err());
901 }
902
903 #[test]
904 fn test_read_char_literal() {
905 assert_eq!(read("#\\a").unwrap(), Value::char('a'));
906 assert_eq!(read("#\\Z").unwrap(), Value::char('Z'));
907 assert_eq!(read("#\\0").unwrap(), Value::char('0'));
908 }
909
910 #[test]
911 fn test_read_char_named() {
912 assert_eq!(read("#\\space").unwrap(), Value::char(' '));
913 assert_eq!(read("#\\newline").unwrap(), Value::char('\n'));
914 assert_eq!(read("#\\tab").unwrap(), Value::char('\t'));
915 assert_eq!(read("#\\return").unwrap(), Value::char('\r'));
916 assert_eq!(read("#\\nul").unwrap(), Value::char('\0'));
917 }
918
919 #[test]
920 fn test_read_char_special() {
921 assert_eq!(read("#\\(").unwrap(), Value::char('('));
922 assert_eq!(read("#\\)").unwrap(), Value::char(')'));
923 }
924
925 #[test]
926 fn test_read_char_in_list() {
927 let result = read("(#\\a #\\b)").unwrap();
928 assert_eq!(
929 result,
930 Value::list(vec![Value::char('a'), Value::char('b')])
931 );
932 }
933
934 #[test]
935 fn test_read_char_unknown_name() {
936 assert!(read("#\\foobar").is_err());
937 }
938
939 #[test]
940 fn test_read_char_eof() {
941 assert!(read("#\\").is_err());
942 }
943
944 #[test]
945 fn test_read_bytevector_literal() {
946 assert_eq!(
947 read("#u8(1 2 3)").unwrap(),
948 Value::bytevector(vec![1, 2, 3])
949 );
950 }
951
952 #[test]
953 fn test_read_bytevector_empty() {
954 assert_eq!(read("#u8()").unwrap(), Value::bytevector(vec![]));
955 }
956
957 #[test]
958 fn test_read_bytevector_single() {
959 assert_eq!(read("#u8(255)").unwrap(), Value::bytevector(vec![255]));
960 }
961
962 #[test]
963 fn test_read_bytevector_out_of_range() {
964 assert!(read("#u8(256)").is_err());
965 }
966
967 #[test]
968 fn test_read_bytevector_negative() {
969 assert!(read("#u8(-1)").is_err());
970 }
971
972 #[test]
973 fn test_read_bytevector_non_integer() {
974 assert!(read("#u8(1.5)").is_err());
975 }
976
977 #[test]
978 fn test_read_bytevector_unterminated() {
979 assert!(read("#u8(1 2").is_err());
980 }
981
982 #[test]
983 fn test_read_bytevector_in_list() {
984 let result = read("(#u8(1 2) #u8(3))").unwrap();
985 assert_eq!(
986 result,
987 Value::list(vec![
988 Value::bytevector(vec![1, 2]),
989 Value::bytevector(vec![3]),
990 ])
991 );
992 }
993
994 #[test]
995 fn test_read_string_hex_escape_basic() {
996 let result = read(r#""\x41;""#).unwrap();
998 assert_eq!(result, Value::string("A"));
999 }
1000
1001 #[test]
1002 fn test_read_string_hex_escape_lowercase() {
1003 let result = read(r#""\x6c;""#).unwrap();
1004 assert_eq!(result, Value::string("l"));
1005 }
1006
1007 #[test]
1008 fn test_read_string_hex_escape_mixed_case() {
1009 let result = read(r#""\x4F;""#).unwrap();
1010 assert_eq!(result, Value::string("O"));
1011 }
1012
1013 #[test]
1014 fn test_read_string_hex_escape_esc_char() {
1015 let result = read(r#""\x1B;""#).unwrap();
1017 assert_eq!(result, Value::string("\x1B"));
1018 }
1019
1020 #[test]
1021 fn test_read_string_hex_escape_null() {
1022 let result = read(r#""\x0;""#).unwrap();
1023 assert_eq!(result, Value::string("\0"));
1024 }
1025
1026 #[test]
1027 fn test_read_string_hex_escape_unicode() {
1028 let result = read(r#""\x3BB;""#).unwrap();
1030 assert_eq!(result, Value::string("λ"));
1031 }
1032
1033 #[test]
1034 fn test_read_string_hex_escape_emoji() {
1035 let result = read(r#""\x1F600;""#).unwrap();
1037 assert_eq!(result, Value::string("😀"));
1038 }
1039
1040 #[test]
1041 fn test_read_string_hex_escape_in_context() {
1042 let result = read(r#""hello\x20;world""#).unwrap();
1044 assert_eq!(result, Value::string("hello world"));
1045 }
1046
1047 #[test]
1048 fn test_read_string_hex_escape_multiple() {
1049 let result = read(r#""\x48;\x69;""#).unwrap();
1050 assert_eq!(result, Value::string("Hi"));
1051 }
1052
1053 #[test]
1054 fn test_read_string_hex_escape_missing_semicolon() {
1055 assert!(read(r#""\x41""#).is_err());
1056 }
1057
1058 #[test]
1059 fn test_read_string_hex_escape_no_digits() {
1060 assert!(read(r#""\x;""#).is_err());
1061 }
1062
1063 #[test]
1064 fn test_read_string_hex_escape_invalid_hex() {
1065 assert!(read(r#""\xGG;""#).is_err());
1066 }
1067
1068 #[test]
1069 fn test_read_string_hex_escape_invalid_codepoint() {
1070 assert!(read(r#""\xD800;""#).is_err());
1072 }
1073
1074 #[test]
1075 fn test_read_string_hex_escape_too_large() {
1076 assert!(read(r#""\x110000;""#).is_err());
1078 }
1079
1080 #[test]
1081 fn test_read_string_u_escape_basic() {
1082 let result = read(r#""\u0041""#).unwrap();
1084 assert_eq!(result, Value::string("A"));
1085 }
1086
1087 #[test]
1088 fn test_read_string_u_escape_lambda() {
1089 let result = read(r#""\u03BB""#).unwrap();
1090 assert_eq!(result, Value::string("λ"));
1091 }
1092
1093 #[test]
1094 fn test_read_string_u_escape_esc() {
1095 let result = read(r#""\u001B""#).unwrap();
1096 assert_eq!(result, Value::string("\x1B"));
1097 }
1098
1099 #[test]
1100 fn test_read_string_u_escape_too_few_digits() {
1101 assert!(read(r#""\u041""#).is_err());
1102 }
1103
1104 #[test]
1105 fn test_read_string_u_escape_surrogate() {
1106 assert!(read(r#""\uD800""#).is_err());
1107 }
1108
1109 #[test]
1110 fn test_read_string_big_u_escape_basic() {
1111 let result = read(r#""\U00000041""#).unwrap();
1112 assert_eq!(result, Value::string("A"));
1113 }
1114
1115 #[test]
1116 fn test_read_string_big_u_escape_emoji() {
1117 let result = read(r#""\U0001F600""#).unwrap();
1118 assert_eq!(result, Value::string("😀"));
1119 }
1120
1121 #[test]
1122 fn test_read_string_big_u_escape_too_few_digits() {
1123 assert!(read(r#""\U0041""#).is_err());
1124 }
1125
1126 #[test]
1127 fn test_read_string_big_u_escape_invalid() {
1128 assert!(read(r#""\U00110000""#).is_err());
1129 }
1130
1131 #[test]
1132 fn test_read_string_null_escape() {
1133 let result = read(r#""\0""#).unwrap();
1134 assert_eq!(result, Value::string("\0"));
1135 }
1136
1137 #[test]
1138 fn test_read_string_mixed_escapes() {
1139 let result = read(r#""\x48;\u0069\n\t""#).unwrap();
1141 assert_eq!(result, Value::string("Hi\n\t"));
1142 }
1143
1144 #[test]
1145 fn test_read_string_ansi_escape_sequence() {
1146 let result = read(r#""\x1B;[31mRed\x1B;[0m""#).unwrap();
1148 assert_eq!(result, Value::string("\x1B[31mRed\x1B[0m"));
1149 }
1150}