1use std::collections::BTreeMap;
2use std::rc::Rc;
3
4use sema_core::{SemaError, Span, SpanMap, Value};
5
6use crate::lexer::{tokenize, SpannedToken, Token};
7
8struct Parser {
9 tokens: Vec<SpannedToken>,
10 pos: usize,
11 span_map: SpanMap,
12}
13
14impl Parser {
15 fn new(tokens: Vec<SpannedToken>) -> Self {
16 Parser {
17 tokens,
18 pos: 0,
19 span_map: SpanMap::new(),
20 }
21 }
22
23 fn peek(&self) -> Option<&Token> {
24 self.tokens.get(self.pos).map(|t| &t.token)
25 }
26
27 fn span(&self) -> Span {
28 self.tokens
29 .get(self.pos)
30 .map(|t| t.span)
31 .unwrap_or(Span { line: 0, col: 0 })
32 }
33
34 fn advance(&mut self) -> Option<&SpannedToken> {
35 let tok = self.tokens.get(self.pos);
36 if tok.is_some() {
37 self.pos += 1;
38 }
39 tok
40 }
41
42 fn expect(&mut self, expected: &Token) -> Result<(), SemaError> {
43 let span = self.span();
44 match self.advance() {
45 Some(t) if &t.token == expected => Ok(()),
46 Some(t) => Err(SemaError::Reader {
47 message: format!(
48 "expected `{}`, got `{}`",
49 token_display(expected),
50 token_display(&t.token)
51 ),
52 span,
53 }),
54 None => Err(SemaError::Reader {
55 message: format!("expected `{}`, got end of input", token_display(expected)),
56 span,
57 }),
58 }
59 }
60
61 fn parse_expr(&mut self) -> Result<Value, SemaError> {
62 let span = self.span();
63 match self.peek() {
64 None => Err(SemaError::Reader {
65 message: "unexpected end of input".to_string(),
66 span,
67 }),
68 Some(Token::LParen) => self.parse_list(),
69 Some(Token::LBracket) => self.parse_vector(),
70 Some(Token::LBrace) => self.parse_map(),
71 Some(Token::Quote) => {
72 self.advance();
73 let inner = self.parse_expr().map_err(|_| {
74 SemaError::Reader {
75 message: "quote (') requires an expression after it".to_string(),
76 span,
77 }
78 .with_hint("e.g. '(1 2 3) or 'foo")
79 })?;
80 self.make_list_with_span(vec![Value::symbol("quote"), inner], span)
81 }
82 Some(Token::Quasiquote) => {
83 self.advance();
84 let inner = self.parse_expr().map_err(|_| {
85 SemaError::Reader {
86 message: "quasiquote (`) requires an expression after it".to_string(),
87 span,
88 }
89 .with_hint("e.g. `(list ,x)")
90 })?;
91 self.make_list_with_span(vec![Value::symbol("quasiquote"), inner], span)
92 }
93 Some(Token::Unquote) => {
94 self.advance();
95 let inner = self.parse_expr().map_err(|_| {
96 SemaError::Reader {
97 message: "unquote (,) requires an expression after it".to_string(),
98 span,
99 }
100 .with_hint("use inside quasiquote, e.g. `(list ,x)")
101 })?;
102 self.make_list_with_span(vec![Value::symbol("unquote"), inner], span)
103 }
104 Some(Token::UnquoteSplice) => {
105 self.advance();
106 let inner = self.parse_expr().map_err(|_| {
107 SemaError::Reader {
108 message: "unquote-splicing (,@) requires an expression after it"
109 .to_string(),
110 span,
111 }
112 .with_hint("use inside quasiquote, e.g. `(list ,@xs)")
113 })?;
114 self.make_list_with_span(vec![Value::symbol("unquote-splicing"), inner], span)
115 }
116 Some(Token::BytevectorStart) => self.parse_bytevector(),
117 Some(_) => self.parse_atom(),
118 }
119 }
120
121 fn make_list_with_span(&mut self, items: Vec<Value>, span: Span) -> Result<Value, SemaError> {
122 let rc = Rc::new(items);
123 let ptr = Rc::as_ptr(&rc) as usize;
124 self.span_map.insert(ptr, span);
125 Ok(Value::list_from_rc(rc))
126 }
127
128 fn parse_list(&mut self) -> Result<Value, SemaError> {
129 let open_span = self.span();
130 self.expect(&Token::LParen)?;
131 let mut items = Vec::new();
132 while self.peek() != Some(&Token::RParen) {
133 if self.peek().is_none() {
134 return Err(SemaError::Reader {
135 message: "unterminated list".to_string(),
136 span: open_span,
137 }
138 .with_hint("add a closing `)`"));
139 }
140 if self.peek() == Some(&Token::Dot) {
142 self.advance(); let cdr = self.parse_expr()?;
144 self.expect(&Token::RParen)?;
145 items.push(Value::symbol("."));
146 items.push(cdr);
147 return self.make_list_with_span(items, open_span);
148 }
149 items.push(self.parse_expr()?);
150 }
151 self.expect(&Token::RParen)?;
152 self.make_list_with_span(items, open_span)
153 }
154
155 fn parse_vector(&mut self) -> Result<Value, SemaError> {
156 let open_span = self.span();
157 self.expect(&Token::LBracket)?;
158 let mut items = Vec::new();
159 while self.peek() != Some(&Token::RBracket) {
160 if self.peek().is_none() {
161 return Err(SemaError::Reader {
162 message: "unterminated vector".to_string(),
163 span: open_span,
164 }
165 .with_hint("add a closing `]`"));
166 }
167 items.push(self.parse_expr()?);
168 }
169 self.expect(&Token::RBracket)?;
170 let rc = Rc::new(items);
171 let ptr = Rc::as_ptr(&rc) as usize;
172 self.span_map.insert(ptr, open_span);
173 Ok(Value::vector_from_rc(rc))
174 }
175
176 fn parse_map(&mut self) -> Result<Value, SemaError> {
177 let open_span = self.span();
178 self.expect(&Token::LBrace)?;
179 let mut map = BTreeMap::new();
180 while self.peek() != Some(&Token::RBrace) {
181 if self.peek().is_none() {
182 return Err(SemaError::Reader {
183 message: "unterminated map".to_string(),
184 span: open_span,
185 }
186 .with_hint("add a closing `}`"));
187 }
188 let key = self.parse_expr()?;
189 if self.peek() == Some(&Token::RBrace) || self.peek().is_none() {
190 return Err(SemaError::Reader {
191 message: "map literal must have even number of forms".to_string(),
192 span: self.span(),
193 });
194 }
195 let val = self.parse_expr()?;
196 map.insert(key, val);
197 }
198 self.expect(&Token::RBrace)?;
199 Ok(Value::map(map))
200 }
201
202 fn parse_bytevector(&mut self) -> Result<Value, SemaError> {
203 let open_span = self.span();
204 self.advance(); let mut bytes = Vec::new();
206 while self.peek() != Some(&Token::RParen) {
207 if self.peek().is_none() {
208 return Err(SemaError::Reader {
209 message: "unterminated bytevector".to_string(),
210 span: open_span,
211 }
212 .with_hint("add a closing `)`"));
213 }
214 let span = self.span();
215 match self.peek() {
216 Some(Token::Int(n)) => {
217 let n = *n;
218 self.advance();
219 if !(0..=255).contains(&n) {
220 return Err(SemaError::Reader {
221 message: format!("#u8(...): byte value {n} out of range 0..255"),
222 span,
223 });
224 }
225 bytes.push(n as u8);
226 }
227 _ => {
228 return Err(SemaError::Reader {
229 message: "#u8(...): expected integer byte value".to_string(),
230 span,
231 });
232 }
233 }
234 }
235 self.expect(&Token::RParen)?;
236 Ok(Value::bytevector(bytes))
237 }
238
239 fn parse_atom(&mut self) -> Result<Value, SemaError> {
240 let span = self.span();
241 match self.advance() {
242 Some(SpannedToken {
243 token: Token::Int(n),
244 ..
245 }) => Ok(Value::int(*n)),
246 Some(SpannedToken {
247 token: Token::Float(f),
248 ..
249 }) => Ok(Value::float(*f)),
250 Some(SpannedToken {
251 token: Token::String(s),
252 ..
253 }) => Ok(Value::string(s)),
254 Some(SpannedToken {
255 token: Token::Symbol(s),
256 ..
257 }) => {
258 if s == "nil" {
259 Ok(Value::nil())
260 } else {
261 Ok(Value::symbol(s))
262 }
263 }
264 Some(SpannedToken {
265 token: Token::Keyword(s),
266 ..
267 }) => Ok(Value::keyword(s)),
268 Some(SpannedToken {
269 token: Token::Bool(b),
270 ..
271 }) => Ok(Value::bool(*b)),
272 Some(SpannedToken {
273 token: Token::Char(c),
274 ..
275 }) => Ok(Value::char(*c)),
276 Some(t) => {
277 let (name, hint) = match &t.token {
278 Token::RParen => (
279 "unexpected closing `)`",
280 Some("no matching opening parenthesis"),
281 ),
282 Token::RBracket => (
283 "unexpected closing `]`",
284 Some("no matching opening bracket"),
285 ),
286 Token::RBrace => ("unexpected closing `}`", Some("no matching opening brace")),
287 Token::Dot => (
288 "unexpected `.`",
289 Some("dots are used in pair notation, e.g. (a . b)"),
290 ),
291 _ => ("unexpected token", None),
292 };
293 let err = SemaError::Reader {
294 message: name.to_string(),
295 span,
296 };
297 Err(if let Some(h) = hint {
298 err.with_hint(h)
299 } else {
300 err
301 })
302 }
303 None => Err(SemaError::Reader {
304 message: "unexpected end of input".to_string(),
305 span,
306 }),
307 }
308 }
309}
310
311fn token_display(tok: &Token) -> &'static str {
312 match tok {
313 Token::LParen => "(",
314 Token::RParen => ")",
315 Token::LBracket => "[",
316 Token::RBracket => "]",
317 Token::LBrace => "{",
318 Token::RBrace => "}",
319 Token::Quote => "'",
320 Token::Quasiquote => "`",
321 Token::Unquote => ",",
322 Token::UnquoteSplice => ",@",
323 Token::Dot => ".",
324 Token::BytevectorStart => "#u8(",
325 Token::Int(_) => "integer",
326 Token::Float(_) => "float",
327 Token::String(_) => "string",
328 Token::Symbol(_) => "symbol",
329 Token::Keyword(_) => "keyword",
330 Token::Bool(_) => "boolean",
331 Token::Char(_) => "character",
332 }
333}
334
335pub fn read(input: &str) -> Result<Value, SemaError> {
337 let tokens = tokenize(input)?;
338 if tokens.is_empty() {
339 return Ok(Value::nil());
340 }
341 let mut parser = Parser::new(tokens);
342 parser.parse_expr()
343}
344
345pub fn read_many(input: &str) -> Result<Vec<Value>, SemaError> {
347 let tokens = tokenize(input)?;
348 if tokens.is_empty() {
349 return Ok(Vec::new());
350 }
351 let mut parser = Parser::new(tokens);
352 let mut exprs = Vec::new();
353 while parser.peek().is_some() {
354 exprs.push(parser.parse_expr()?);
355 }
356 Ok(exprs)
357}
358
359pub fn read_many_with_spans(input: &str) -> Result<(Vec<Value>, SpanMap), SemaError> {
361 let tokens = tokenize(input)?;
362 if tokens.is_empty() {
363 return Ok((Vec::new(), SpanMap::new()));
364 }
365 let mut parser = Parser::new(tokens);
366 let mut exprs = Vec::new();
367 while parser.peek().is_some() {
368 exprs.push(parser.parse_expr()?);
369 }
370 Ok((exprs, parser.span_map))
371}
372
373#[cfg(test)]
374mod tests {
375 use super::*;
376
377 #[test]
378 fn test_read_int() {
379 assert_eq!(read("42").unwrap(), Value::int(42));
380 }
381
382 #[test]
383 fn test_read_negative_int() {
384 assert_eq!(read("-7").unwrap(), Value::int(-7));
385 }
386
387 #[test]
388 fn test_read_float() {
389 assert_eq!(read("3.14").unwrap(), Value::float(3.14));
390 }
391
392 #[test]
393 fn test_read_string() {
394 assert_eq!(read("\"hello\"").unwrap(), Value::string("hello"));
395 }
396
397 #[test]
398 fn test_read_symbol() {
399 assert_eq!(read("foo").unwrap(), Value::symbol("foo"));
400 }
401
402 #[test]
403 fn test_read_keyword() {
404 assert_eq!(read(":bar").unwrap(), Value::keyword("bar"));
405 }
406
407 #[test]
408 fn test_read_bool() {
409 assert_eq!(read("#t").unwrap(), Value::bool(true));
410 assert_eq!(read("#f").unwrap(), Value::bool(false));
411 }
412
413 #[test]
414 fn test_read_list() {
415 let result = read("(+ 1 2)").unwrap();
416 assert_eq!(
417 result,
418 Value::list(vec![Value::symbol("+"), Value::int(1), Value::int(2)])
419 );
420 }
421
422 #[test]
423 fn test_read_nested_list() {
424 let result = read("(* (+ 1 2) 3)").unwrap();
425 assert_eq!(
426 result,
427 Value::list(vec![
428 Value::symbol("*"),
429 Value::list(vec![Value::symbol("+"), Value::int(1), Value::int(2)]),
430 Value::int(3)
431 ])
432 );
433 }
434
435 #[test]
436 fn test_read_vector() {
437 let result = read("[1 2 3]").unwrap();
438 assert_eq!(
439 result,
440 Value::vector(vec![Value::int(1), Value::int(2), Value::int(3)])
441 );
442 }
443
444 #[test]
445 fn test_read_map() {
446 let result = read("{:a 1 :b 2}").unwrap();
447 let mut expected = BTreeMap::new();
448 expected.insert(Value::keyword("a"), Value::int(1));
449 expected.insert(Value::keyword("b"), Value::int(2));
450 assert_eq!(result, Value::map(expected));
451 }
452
453 #[test]
454 fn test_read_quote() {
455 let result = read("'foo").unwrap();
456 assert_eq!(
457 result,
458 Value::list(vec![Value::symbol("quote"), Value::symbol("foo")])
459 );
460 }
461
462 #[test]
463 fn test_read_quasiquote() {
464 let result = read("`(a ,b ,@c)").unwrap();
465 assert_eq!(
466 result,
467 Value::list(vec![
468 Value::symbol("quasiquote"),
469 Value::list(vec![
470 Value::symbol("a"),
471 Value::list(vec![Value::symbol("unquote"), Value::symbol("b")]),
472 Value::list(vec![Value::symbol("unquote-splicing"), Value::symbol("c")]),
473 ])
474 ])
475 );
476 }
477
478 #[test]
479 fn test_read_nil() {
480 assert_eq!(read("nil").unwrap(), Value::nil());
481 }
482
483 #[test]
484 fn test_read_many_exprs() {
485 let results = read_many("1 2 3").unwrap();
486 assert_eq!(results, vec![Value::int(1), Value::int(2), Value::int(3)]);
487 }
488
489 #[test]
490 fn test_comments() {
491 let result = read_many("; comment\n(+ 1 2)").unwrap();
492 assert_eq!(result.len(), 1);
493 }
494
495 #[test]
496 fn test_read_zero() {
497 assert_eq!(read("0").unwrap(), Value::int(0));
498 }
499
500 #[test]
501 fn test_read_negative_zero() {
502 assert_eq!(read("-0").unwrap(), Value::int(0));
503 }
504
505 #[test]
506 fn test_read_leading_zeros() {
507 assert_eq!(read("007").unwrap(), Value::int(7));
508 }
509
510 #[test]
511 fn test_read_large_int() {
512 assert_eq!(read("9999999999999").unwrap(), Value::int(9999999999999));
513 }
514
515 #[test]
516 fn test_read_int_overflow() {
517 assert!(read("9999999999999999999999").is_err());
519 }
520
521 #[test]
522 fn test_read_negative_float() {
523 assert_eq!(read("-2.5").unwrap(), Value::float(-2.5));
524 }
525
526 #[test]
527 fn test_read_float_leading_zero() {
528 assert_eq!(read("0.5").unwrap(), Value::float(0.5));
529 }
530
531 #[test]
532 fn test_read_minus_is_symbol() {
533 assert_eq!(read("-").unwrap(), Value::symbol("-"));
535 }
536
537 #[test]
538 fn test_read_minus_in_list() {
539 let result = read("(- 3)").unwrap();
541 assert_eq!(result, Value::list(vec![Value::symbol("-"), Value::int(3)]));
542 }
543
544 #[test]
545 fn test_read_negative_in_list() {
546 let result = read("(-3)").unwrap();
548 assert_eq!(result, Value::list(vec![Value::int(-3)]));
549 }
550
551 #[test]
552 fn test_read_empty_string() {
553 assert_eq!(read(r#""""#).unwrap(), Value::string(""));
554 }
555
556 #[test]
557 fn test_read_string_with_escapes() {
558 assert_eq!(
559 read(r#""\n\t\r\\\"" "#).unwrap(),
560 Value::string("\n\t\r\\\"")
561 );
562 }
563
564 #[test]
565 fn test_read_string_unknown_escape() {
566 assert_eq!(read(r#""\z""#).unwrap(), Value::string("\\z"));
568 }
569
570 #[test]
571 fn test_read_string_with_newline() {
572 assert_eq!(
573 read("\"line1\nline2\"").unwrap(),
574 Value::string("line1\nline2")
575 );
576 }
577
578 #[test]
579 fn test_read_unterminated_string() {
580 assert!(read("\"hello").is_err());
581 }
582
583 #[test]
584 fn test_read_string_escaped_quote_at_end() {
585 assert!(read(r#""test\""#).is_err());
587 }
588
589 #[test]
590 fn test_read_string_with_unicode() {
591 assert_eq!(read("\"héllo\"").unwrap(), Value::string("héllo"));
592 assert_eq!(read("\"日本語\"").unwrap(), Value::string("日本語"));
593 assert_eq!(read("\"🎉\"").unwrap(), Value::string("🎉"));
594 }
595
596 #[test]
597 fn test_read_string_with_parens() {
598 assert_eq!(read("\"(+ 1 2)\"").unwrap(), Value::string("(+ 1 2)"));
599 }
600
601 #[test]
602 fn test_read_operator_symbols() {
603 assert_eq!(read("+").unwrap(), Value::symbol("+"));
604 assert_eq!(read("*").unwrap(), Value::symbol("*"));
605 assert_eq!(read("/").unwrap(), Value::symbol("/"));
606 assert_eq!(read("<=").unwrap(), Value::symbol("<="));
607 assert_eq!(read(">=").unwrap(), Value::symbol(">="));
608 }
609
610 #[test]
611 fn test_read_predicate_symbols() {
612 assert_eq!(read("null?").unwrap(), Value::symbol("null?"));
613 assert_eq!(read("list?").unwrap(), Value::symbol("list?"));
614 }
615
616 #[test]
617 fn test_read_arrow_symbols() {
618 assert_eq!(
619 read("string->symbol").unwrap(),
620 Value::symbol("string->symbol")
621 );
622 }
623
624 #[test]
625 fn test_read_namespaced_symbols() {
626 assert_eq!(read("file/read").unwrap(), Value::symbol("file/read"));
627 assert_eq!(read("http/get").unwrap(), Value::symbol("http/get"));
628 }
629
630 #[test]
631 fn test_read_true_false_as_bool() {
632 assert_eq!(read("true").unwrap(), Value::bool(true));
633 assert_eq!(read("false").unwrap(), Value::bool(false));
634 }
635
636 #[test]
637 fn test_read_bare_colon_error() {
638 assert!(read(":").is_err());
640 }
641
642 #[test]
643 fn test_read_keyword_with_numbers() {
644 assert_eq!(read(":foo123").unwrap(), Value::keyword("foo123"));
645 }
646
647 #[test]
648 fn test_read_keyword_with_hyphens() {
649 assert_eq!(read(":max-turns").unwrap(), Value::keyword("max-turns"));
650 }
651
652 #[test]
653 fn test_read_hash_invalid() {
654 assert!(read("#x").is_err());
655 assert!(read("#").is_err());
656 }
657
658 #[test]
659 fn test_read_empty() {
660 assert_eq!(read("").unwrap(), Value::nil());
661 }
662
663 #[test]
664 fn test_read_whitespace_only() {
665 assert_eq!(read(" \n\t ").unwrap(), Value::nil());
666 }
667
668 #[test]
669 fn test_read_many_empty() {
670 assert_eq!(read_many("").unwrap(), vec![]);
671 }
672
673 #[test]
674 fn test_read_many_whitespace_only() {
675 assert_eq!(read_many(" \n ").unwrap(), vec![]);
676 }
677
678 #[test]
679 fn test_read_comment_only() {
680 assert_eq!(read_many("; just a comment").unwrap(), vec![]);
681 }
682
683 #[test]
684 fn test_read_empty_list() {
685 assert_eq!(read("()").unwrap(), Value::list(vec![]));
686 }
687
688 #[test]
689 fn test_read_deeply_nested() {
690 let result = read("((((42))))").unwrap();
691 assert_eq!(
692 result,
693 Value::list(vec![Value::list(vec![Value::list(vec![Value::list(
694 vec![Value::int(42)]
695 )])])])
696 );
697 }
698
699 #[test]
700 fn test_read_unterminated_list() {
701 assert!(read("(1 2").is_err());
702 }
703
704 #[test]
705 fn test_read_extra_rparen() {
706 let result = read("42").unwrap();
709 assert_eq!(result, Value::int(42));
710 }
711
712 #[test]
713 fn test_read_dotted_pair() {
714 let result = read("(a . b)").unwrap();
715 assert_eq!(
716 result,
717 Value::list(vec![
718 Value::symbol("a"),
719 Value::symbol("."),
720 Value::symbol("b")
721 ])
722 );
723 }
724
725 #[test]
726 fn test_read_empty_vector() {
727 assert_eq!(read("[]").unwrap(), Value::vector(vec![]));
728 }
729
730 #[test]
731 fn test_read_unterminated_vector() {
732 assert!(read("[1 2").is_err());
733 }
734
735 #[test]
736 fn test_read_empty_map() {
737 assert_eq!(read("{}").unwrap(), Value::map(BTreeMap::new()));
738 }
739
740 #[test]
741 fn test_read_unterminated_map() {
742 assert!(read("{:a 1").is_err());
743 }
744
745 #[test]
746 fn test_read_map_odd_elements() {
747 assert!(read("{:a 1 :b}").is_err());
748 }
749
750 #[test]
751 fn test_read_map_duplicate_keys() {
752 let result = read("{:a 1 :a 2}").unwrap();
754 let mut expected = BTreeMap::new();
755 expected.insert(Value::keyword("a"), Value::int(2));
756 assert_eq!(result, Value::map(expected));
757 }
758
759 #[test]
760 fn test_read_nested_quote() {
761 let result = read("''foo").unwrap();
762 assert_eq!(
763 result,
764 Value::list(vec![
765 Value::symbol("quote"),
766 Value::list(vec![Value::symbol("quote"), Value::symbol("foo")])
767 ])
768 );
769 }
770
771 #[test]
772 fn test_read_quote_list() {
773 let result = read("'(1 2 3)").unwrap();
774 assert_eq!(
775 result,
776 Value::list(vec![
777 Value::symbol("quote"),
778 Value::list(vec![Value::int(1), Value::int(2), Value::int(3)])
779 ])
780 );
781 }
782
783 #[test]
784 fn test_read_quote_at_eof() {
785 assert!(read("'").is_err());
786 }
787
788 #[test]
789 fn test_read_unquote_at_eof() {
790 assert!(read(",").is_err());
791 }
792
793 #[test]
794 fn test_read_unquote_splice_at_eof() {
795 assert!(read(",@").is_err());
796 }
797
798 #[test]
799 fn test_read_quasiquote_at_eof() {
800 assert!(read("`").is_err());
801 }
802
803 #[test]
804 fn test_read_comment_after_expr() {
805 assert_eq!(read_many("42 ; comment").unwrap(), vec![Value::int(42)]);
806 }
807
808 #[test]
809 fn test_read_multiple_comments() {
810 let result = read_many("; first\n; second\n42").unwrap();
811 assert_eq!(result, vec![Value::int(42)]);
812 }
813
814 #[test]
815 fn test_read_comment_no_newline() {
816 assert_eq!(read_many("; comment").unwrap(), vec![]);
818 }
819
820 #[test]
821 fn test_read_crlf_line_endings() {
822 let result = read_many("1\r\n2\r\n3").unwrap();
823 assert_eq!(result, vec![Value::int(1), Value::int(2), Value::int(3)]);
824 }
825
826 #[test]
827 fn test_read_tabs_as_whitespace() {
828 assert_eq!(
829 read("(\t+\t1\t2\t)").unwrap(),
830 Value::list(vec![Value::symbol("+"), Value::int(1), Value::int(2)])
831 );
832 }
833
834 #[test]
835 fn test_read_mixed_collections() {
836 let result = read("([1 2] {:a 3})").unwrap();
838 let mut map = BTreeMap::new();
839 map.insert(Value::keyword("a"), Value::int(3));
840 assert_eq!(
841 result,
842 Value::list(vec![
843 Value::vector(vec![Value::int(1), Value::int(2)]),
844 Value::map(map)
845 ])
846 );
847 }
848
849 #[test]
850 fn test_read_many_mixed_types() {
851 let result = read_many(r#"42 3.14 "hello" foo :bar #t nil"#).unwrap();
852 assert_eq!(result.len(), 7);
853 assert_eq!(result[0], Value::int(42));
854 assert_eq!(result[1], Value::float(3.14));
855 assert_eq!(result[2], Value::string("hello"));
856 assert_eq!(result[3], Value::symbol("foo"));
857 assert_eq!(result[4], Value::keyword("bar"));
858 assert_eq!(result[5], Value::bool(true));
859 assert_eq!(result[6], Value::nil());
860 }
861
862 #[test]
863 fn test_span_map_tracks_lists() {
864 let (exprs, spans) = read_many_with_spans("(+ 1 2)").unwrap();
865 assert_eq!(exprs.len(), 1);
866 let rc = exprs[0].as_list_rc().expect("expected list");
868 let ptr = Rc::as_ptr(&rc) as usize;
869 let span = spans.get(&ptr).expect("list should have span");
870 assert_eq!(span.line, 1);
871 assert_eq!(span.col, 1);
872 }
873
874 #[test]
875 fn test_span_map_multiline() {
876 let (exprs, spans) = read_many_with_spans("(foo)\n(bar)").unwrap();
877 assert_eq!(exprs.len(), 2);
878 let rc = exprs[1].as_list_rc().expect("expected list");
879 let ptr = Rc::as_ptr(&rc) as usize;
880 let span = spans.get(&ptr).expect("second list should have span");
881 assert_eq!(span.line, 2);
882 assert_eq!(span.col, 1);
883 }
884
885 #[test]
886 fn test_read_unexpected_char() {
887 assert!(read("@").is_err());
888 assert!(read("$").is_err());
889 }
890
891 #[test]
892 fn test_read_char_literal() {
893 assert_eq!(read("#\\a").unwrap(), Value::char('a'));
894 assert_eq!(read("#\\Z").unwrap(), Value::char('Z'));
895 assert_eq!(read("#\\0").unwrap(), Value::char('0'));
896 }
897
898 #[test]
899 fn test_read_char_named() {
900 assert_eq!(read("#\\space").unwrap(), Value::char(' '));
901 assert_eq!(read("#\\newline").unwrap(), Value::char('\n'));
902 assert_eq!(read("#\\tab").unwrap(), Value::char('\t'));
903 assert_eq!(read("#\\return").unwrap(), Value::char('\r'));
904 assert_eq!(read("#\\nul").unwrap(), Value::char('\0'));
905 }
906
907 #[test]
908 fn test_read_char_special() {
909 assert_eq!(read("#\\(").unwrap(), Value::char('('));
910 assert_eq!(read("#\\)").unwrap(), Value::char(')'));
911 }
912
913 #[test]
914 fn test_read_char_in_list() {
915 let result = read("(#\\a #\\b)").unwrap();
916 assert_eq!(
917 result,
918 Value::list(vec![Value::char('a'), Value::char('b')])
919 );
920 }
921
922 #[test]
923 fn test_read_char_unknown_name() {
924 assert!(read("#\\foobar").is_err());
925 }
926
927 #[test]
928 fn test_read_char_eof() {
929 assert!(read("#\\").is_err());
930 }
931
932 #[test]
933 fn test_read_bytevector_literal() {
934 assert_eq!(
935 read("#u8(1 2 3)").unwrap(),
936 Value::bytevector(vec![1, 2, 3])
937 );
938 }
939
940 #[test]
941 fn test_read_bytevector_empty() {
942 assert_eq!(read("#u8()").unwrap(), Value::bytevector(vec![]));
943 }
944
945 #[test]
946 fn test_read_bytevector_single() {
947 assert_eq!(read("#u8(255)").unwrap(), Value::bytevector(vec![255]));
948 }
949
950 #[test]
951 fn test_read_bytevector_out_of_range() {
952 assert!(read("#u8(256)").is_err());
953 }
954
955 #[test]
956 fn test_read_bytevector_negative() {
957 assert!(read("#u8(-1)").is_err());
958 }
959
960 #[test]
961 fn test_read_bytevector_non_integer() {
962 assert!(read("#u8(1.5)").is_err());
963 }
964
965 #[test]
966 fn test_read_bytevector_unterminated() {
967 assert!(read("#u8(1 2").is_err());
968 }
969
970 #[test]
971 fn test_read_bytevector_in_list() {
972 let result = read("(#u8(1 2) #u8(3))").unwrap();
973 assert_eq!(
974 result,
975 Value::list(vec![
976 Value::bytevector(vec![1, 2]),
977 Value::bytevector(vec![3]),
978 ])
979 );
980 }
981
982 #[test]
983 fn test_read_string_hex_escape_basic() {
984 let result = read(r#""\x41;""#).unwrap();
986 assert_eq!(result, Value::string("A"));
987 }
988
989 #[test]
990 fn test_read_string_hex_escape_lowercase() {
991 let result = read(r#""\x6c;""#).unwrap();
992 assert_eq!(result, Value::string("l"));
993 }
994
995 #[test]
996 fn test_read_string_hex_escape_mixed_case() {
997 let result = read(r#""\x4F;""#).unwrap();
998 assert_eq!(result, Value::string("O"));
999 }
1000
1001 #[test]
1002 fn test_read_string_hex_escape_esc_char() {
1003 let result = read(r#""\x1B;""#).unwrap();
1005 assert_eq!(result, Value::string("\x1B"));
1006 }
1007
1008 #[test]
1009 fn test_read_string_hex_escape_null() {
1010 let result = read(r#""\x0;""#).unwrap();
1011 assert_eq!(result, Value::string("\0"));
1012 }
1013
1014 #[test]
1015 fn test_read_string_hex_escape_unicode() {
1016 let result = read(r#""\x3BB;""#).unwrap();
1018 assert_eq!(result, Value::string("λ"));
1019 }
1020
1021 #[test]
1022 fn test_read_string_hex_escape_emoji() {
1023 let result = read(r#""\x1F600;""#).unwrap();
1025 assert_eq!(result, Value::string("😀"));
1026 }
1027
1028 #[test]
1029 fn test_read_string_hex_escape_in_context() {
1030 let result = read(r#""hello\x20;world""#).unwrap();
1032 assert_eq!(result, Value::string("hello world"));
1033 }
1034
1035 #[test]
1036 fn test_read_string_hex_escape_multiple() {
1037 let result = read(r#""\x48;\x69;""#).unwrap();
1038 assert_eq!(result, Value::string("Hi"));
1039 }
1040
1041 #[test]
1042 fn test_read_string_hex_escape_missing_semicolon() {
1043 assert!(read(r#""\x41""#).is_err());
1044 }
1045
1046 #[test]
1047 fn test_read_string_hex_escape_no_digits() {
1048 assert!(read(r#""\x;""#).is_err());
1049 }
1050
1051 #[test]
1052 fn test_read_string_hex_escape_invalid_hex() {
1053 assert!(read(r#""\xGG;""#).is_err());
1054 }
1055
1056 #[test]
1057 fn test_read_string_hex_escape_invalid_codepoint() {
1058 assert!(read(r#""\xD800;""#).is_err());
1060 }
1061
1062 #[test]
1063 fn test_read_string_hex_escape_too_large() {
1064 assert!(read(r#""\x110000;""#).is_err());
1066 }
1067
1068 #[test]
1069 fn test_read_string_u_escape_basic() {
1070 let result = read(r#""\u0041""#).unwrap();
1072 assert_eq!(result, Value::string("A"));
1073 }
1074
1075 #[test]
1076 fn test_read_string_u_escape_lambda() {
1077 let result = read(r#""\u03BB""#).unwrap();
1078 assert_eq!(result, Value::string("λ"));
1079 }
1080
1081 #[test]
1082 fn test_read_string_u_escape_esc() {
1083 let result = read(r#""\u001B""#).unwrap();
1084 assert_eq!(result, Value::string("\x1B"));
1085 }
1086
1087 #[test]
1088 fn test_read_string_u_escape_too_few_digits() {
1089 assert!(read(r#""\u041""#).is_err());
1090 }
1091
1092 #[test]
1093 fn test_read_string_u_escape_surrogate() {
1094 assert!(read(r#""\uD800""#).is_err());
1095 }
1096
1097 #[test]
1098 fn test_read_string_big_u_escape_basic() {
1099 let result = read(r#""\U00000041""#).unwrap();
1100 assert_eq!(result, Value::string("A"));
1101 }
1102
1103 #[test]
1104 fn test_read_string_big_u_escape_emoji() {
1105 let result = read(r#""\U0001F600""#).unwrap();
1106 assert_eq!(result, Value::string("😀"));
1107 }
1108
1109 #[test]
1110 fn test_read_string_big_u_escape_too_few_digits() {
1111 assert!(read(r#""\U0041""#).is_err());
1112 }
1113
1114 #[test]
1115 fn test_read_string_big_u_escape_invalid() {
1116 assert!(read(r#""\U00110000""#).is_err());
1117 }
1118
1119 #[test]
1120 fn test_read_string_null_escape() {
1121 let result = read(r#""\0""#).unwrap();
1122 assert_eq!(result, Value::string("\0"));
1123 }
1124
1125 #[test]
1126 fn test_read_string_mixed_escapes() {
1127 let result = read(r#""\x48;\u0069\n\t""#).unwrap();
1129 assert_eq!(result, Value::string("Hi\n\t"));
1130 }
1131
1132 #[test]
1133 fn test_read_string_ansi_escape_sequence() {
1134 let result = read(r#""\x1B;[31mRed\x1B;[0m""#).unwrap();
1136 assert_eq!(result, Value::string("\x1B[31mRed\x1B[0m"));
1137 }
1138}