Skip to main content

sema_reader/
reader.rs

1use std::collections::BTreeMap;
2use std::rc::Rc;
3
4use sema_core::{SemaError, Span, SpanMap, Value};
5
6use crate::lexer::{tokenize, SpannedToken, Token};
7
8struct Parser {
9    tokens: Vec<SpannedToken>,
10    pos: usize,
11    span_map: SpanMap,
12}
13
14impl Parser {
15    fn new(tokens: Vec<SpannedToken>) -> Self {
16        Parser {
17            tokens,
18            pos: 0,
19            span_map: SpanMap::new(),
20        }
21    }
22
23    fn peek(&self) -> Option<&Token> {
24        self.tokens.get(self.pos).map(|t| &t.token)
25    }
26
27    fn span(&self) -> Span {
28        self.tokens
29            .get(self.pos)
30            .map(|t| t.span)
31            .unwrap_or(Span::point(0, 0))
32    }
33
34    fn advance(&mut self) -> Option<&SpannedToken> {
35        let tok = self.tokens.get(self.pos);
36        if tok.is_some() {
37            self.pos += 1;
38        }
39        tok
40    }
41
42    fn expect(&mut self, expected: &Token) -> Result<(), SemaError> {
43        let span = self.span();
44        match self.advance() {
45            Some(t) if &t.token == expected => Ok(()),
46            Some(t) => Err(SemaError::Reader {
47                message: format!(
48                    "expected `{}`, got `{}`",
49                    token_display(expected),
50                    token_display(&t.token)
51                ),
52                span,
53            }),
54            None => Err(SemaError::Reader {
55                message: format!("expected `{}`, got end of input", token_display(expected)),
56                span,
57            }),
58        }
59    }
60
61    fn parse_expr(&mut self) -> Result<Value, SemaError> {
62        let span = self.span();
63        match self.peek() {
64            None => Err(SemaError::Reader {
65                message: "unexpected end of input".to_string(),
66                span,
67            }),
68            Some(Token::LParen) => self.parse_list(),
69            Some(Token::LBracket) => self.parse_vector(),
70            Some(Token::LBrace) => self.parse_map(),
71            Some(Token::Quote) => {
72                self.advance();
73                let inner = self.parse_expr().map_err(|_| {
74                    SemaError::Reader {
75                        message: "quote (') requires an expression after it".to_string(),
76                        span,
77                    }
78                    .with_hint("e.g. '(1 2 3) or 'foo")
79                })?;
80                self.make_list_with_span(vec![Value::symbol("quote"), inner], span)
81            }
82            Some(Token::Quasiquote) => {
83                self.advance();
84                let inner = self.parse_expr().map_err(|_| {
85                    SemaError::Reader {
86                        message: "quasiquote (`) requires an expression after it".to_string(),
87                        span,
88                    }
89                    .with_hint("e.g. `(list ,x)")
90                })?;
91                self.make_list_with_span(vec![Value::symbol("quasiquote"), inner], span)
92            }
93            Some(Token::Unquote) => {
94                self.advance();
95                let inner = self.parse_expr().map_err(|_| {
96                    SemaError::Reader {
97                        message: "unquote (,) requires an expression after it".to_string(),
98                        span,
99                    }
100                    .with_hint("use inside quasiquote, e.g. `(list ,x)")
101                })?;
102                self.make_list_with_span(vec![Value::symbol("unquote"), inner], span)
103            }
104            Some(Token::UnquoteSplice) => {
105                self.advance();
106                let inner = self.parse_expr().map_err(|_| {
107                    SemaError::Reader {
108                        message: "unquote-splicing (,@) requires an expression after it"
109                            .to_string(),
110                        span,
111                    }
112                    .with_hint("use inside quasiquote, e.g. `(list ,@xs)")
113                })?;
114                self.make_list_with_span(vec![Value::symbol("unquote-splicing"), inner], span)
115            }
116            Some(Token::BytevectorStart) => self.parse_bytevector(),
117            Some(_) => self.parse_atom(),
118        }
119    }
120
121    fn make_list_with_span(&mut self, items: Vec<Value>, span: Span) -> Result<Value, SemaError> {
122        let rc = Rc::new(items);
123        let ptr = Rc::as_ptr(&rc) as usize;
124        self.span_map.insert(ptr, span);
125        Ok(Value::list_from_rc(rc))
126    }
127
128    /// Get the span of the previously consumed token (the one at pos-1).
129    fn prev_span(&self) -> Span {
130        if self.pos > 0 {
131            self.tokens[self.pos - 1].span
132        } else {
133            Span::point(0, 0)
134        }
135    }
136
137    fn parse_list(&mut self) -> Result<Value, SemaError> {
138        let open_span = self.span();
139        self.expect(&Token::LParen)?;
140        let mut items = Vec::new();
141        while self.peek() != Some(&Token::RParen) {
142            if self.peek().is_none() {
143                return Err(SemaError::Reader {
144                    message: "unterminated list".to_string(),
145                    span: open_span,
146                }
147                .with_hint("add a closing `)`"));
148            }
149            // Handle dotted pairs: (a . b)
150            if self.peek() == Some(&Token::Dot) {
151                self.advance(); // skip dot
152                let cdr = self.parse_expr()?;
153                self.expect(&Token::RParen)?;
154                let close = self.prev_span();
155                items.push(Value::symbol("."));
156                items.push(cdr);
157                return self.make_list_with_span(items, open_span.to(&close));
158            }
159            items.push(self.parse_expr()?);
160        }
161        self.expect(&Token::RParen)?;
162        let close = self.prev_span();
163        self.make_list_with_span(items, open_span.to(&close))
164    }
165
166    fn parse_vector(&mut self) -> Result<Value, SemaError> {
167        let open_span = self.span();
168        self.expect(&Token::LBracket)?;
169        let mut items = Vec::new();
170        while self.peek() != Some(&Token::RBracket) {
171            if self.peek().is_none() {
172                return Err(SemaError::Reader {
173                    message: "unterminated vector".to_string(),
174                    span: open_span,
175                }
176                .with_hint("add a closing `]`"));
177            }
178            items.push(self.parse_expr()?);
179        }
180        self.expect(&Token::RBracket)?;
181        let close = self.prev_span();
182        let rc = Rc::new(items);
183        let ptr = Rc::as_ptr(&rc) as usize;
184        self.span_map.insert(ptr, open_span.to(&close));
185        Ok(Value::vector_from_rc(rc))
186    }
187
188    fn parse_map(&mut self) -> Result<Value, SemaError> {
189        let open_span = self.span();
190        self.expect(&Token::LBrace)?;
191        let mut map = BTreeMap::new();
192        while self.peek() != Some(&Token::RBrace) {
193            if self.peek().is_none() {
194                return Err(SemaError::Reader {
195                    message: "unterminated map".to_string(),
196                    span: open_span,
197                }
198                .with_hint("add a closing `}`"));
199            }
200            let key = self.parse_expr()?;
201            if self.peek() == Some(&Token::RBrace) || self.peek().is_none() {
202                return Err(SemaError::Reader {
203                    message: "map literal must have even number of forms".to_string(),
204                    span: self.span(),
205                });
206            }
207            let val = self.parse_expr()?;
208            map.insert(key, val);
209        }
210        self.expect(&Token::RBrace)?;
211        Ok(Value::map(map))
212    }
213
214    fn parse_bytevector(&mut self) -> Result<Value, SemaError> {
215        let open_span = self.span();
216        self.advance(); // consume BytevectorStart token
217        let mut bytes = Vec::new();
218        while self.peek() != Some(&Token::RParen) {
219            if self.peek().is_none() {
220                return Err(SemaError::Reader {
221                    message: "unterminated bytevector".to_string(),
222                    span: open_span,
223                }
224                .with_hint("add a closing `)`"));
225            }
226            let span = self.span();
227            match self.peek() {
228                Some(Token::Int(n)) => {
229                    let n = *n;
230                    self.advance();
231                    if !(0..=255).contains(&n) {
232                        return Err(SemaError::Reader {
233                            message: format!("#u8(...): byte value {n} out of range 0..255"),
234                            span,
235                        });
236                    }
237                    bytes.push(n as u8);
238                }
239                _ => {
240                    return Err(SemaError::Reader {
241                        message: "#u8(...): expected integer byte value".to_string(),
242                        span,
243                    });
244                }
245            }
246        }
247        self.expect(&Token::RParen)?;
248        Ok(Value::bytevector(bytes))
249    }
250
251    fn parse_atom(&mut self) -> Result<Value, SemaError> {
252        let span = self.span();
253        match self.advance() {
254            Some(SpannedToken {
255                token: Token::Int(n),
256                ..
257            }) => Ok(Value::int(*n)),
258            Some(SpannedToken {
259                token: Token::Float(f),
260                ..
261            }) => Ok(Value::float(*f)),
262            Some(SpannedToken {
263                token: Token::String(s),
264                ..
265            }) => Ok(Value::string(s)),
266            Some(SpannedToken {
267                token: Token::Symbol(s),
268                ..
269            }) => {
270                if s == "nil" {
271                    Ok(Value::nil())
272                } else {
273                    Ok(Value::symbol(s))
274                }
275            }
276            Some(SpannedToken {
277                token: Token::Keyword(s),
278                ..
279            }) => Ok(Value::keyword(s)),
280            Some(SpannedToken {
281                token: Token::Bool(b),
282                ..
283            }) => Ok(Value::bool(*b)),
284            Some(SpannedToken {
285                token: Token::Char(c),
286                ..
287            }) => Ok(Value::char(*c)),
288            Some(t) => {
289                let (name, hint) = match &t.token {
290                    Token::RParen => (
291                        "unexpected closing `)`",
292                        Some("no matching opening parenthesis"),
293                    ),
294                    Token::RBracket => (
295                        "unexpected closing `]`",
296                        Some("no matching opening bracket"),
297                    ),
298                    Token::RBrace => ("unexpected closing `}`", Some("no matching opening brace")),
299                    Token::Dot => (
300                        "unexpected `.`",
301                        Some("dots are used in pair notation, e.g. (a . b)"),
302                    ),
303                    _ => ("unexpected token", None),
304                };
305                let err = SemaError::Reader {
306                    message: name.to_string(),
307                    span,
308                };
309                Err(if let Some(h) = hint {
310                    err.with_hint(h)
311                } else {
312                    err
313                })
314            }
315            None => Err(SemaError::Reader {
316                message: "unexpected end of input".to_string(),
317                span,
318            }),
319        }
320    }
321}
322
323fn token_display(tok: &Token) -> &'static str {
324    match tok {
325        Token::LParen => "(",
326        Token::RParen => ")",
327        Token::LBracket => "[",
328        Token::RBracket => "]",
329        Token::LBrace => "{",
330        Token::RBrace => "}",
331        Token::Quote => "'",
332        Token::Quasiquote => "`",
333        Token::Unquote => ",",
334        Token::UnquoteSplice => ",@",
335        Token::Dot => ".",
336        Token::BytevectorStart => "#u8(",
337        Token::Int(_) => "integer",
338        Token::Float(_) => "float",
339        Token::String(_) => "string",
340        Token::Symbol(_) => "symbol",
341        Token::Keyword(_) => "keyword",
342        Token::Bool(_) => "boolean",
343        Token::Char(_) => "character",
344    }
345}
346
347/// Read a single s-expression from a string.
348pub fn read(input: &str) -> Result<Value, SemaError> {
349    let tokens = tokenize(input)?;
350    if tokens.is_empty() {
351        return Ok(Value::nil());
352    }
353    let mut parser = Parser::new(tokens);
354    parser.parse_expr()
355}
356
357/// Read all s-expressions from a string.
358pub fn read_many(input: &str) -> Result<Vec<Value>, SemaError> {
359    let tokens = tokenize(input)?;
360    if tokens.is_empty() {
361        return Ok(Vec::new());
362    }
363    let mut parser = Parser::new(tokens);
364    let mut exprs = Vec::new();
365    while parser.peek().is_some() {
366        exprs.push(parser.parse_expr()?);
367    }
368    Ok(exprs)
369}
370
371/// Read all s-expressions and return the accumulated span map.
372pub fn read_many_with_spans(input: &str) -> Result<(Vec<Value>, SpanMap), SemaError> {
373    let tokens = tokenize(input)?;
374    if tokens.is_empty() {
375        return Ok((Vec::new(), SpanMap::new()));
376    }
377    let mut parser = Parser::new(tokens);
378    let mut exprs = Vec::new();
379    while parser.peek().is_some() {
380        exprs.push(parser.parse_expr()?);
381    }
382    Ok((exprs, parser.span_map))
383}
384
385#[cfg(test)]
386mod tests {
387    use super::*;
388
389    #[test]
390    fn test_read_int() {
391        assert_eq!(read("42").unwrap(), Value::int(42));
392    }
393
394    #[test]
395    fn test_read_negative_int() {
396        assert_eq!(read("-7").unwrap(), Value::int(-7));
397    }
398
399    #[test]
400    fn test_read_float() {
401        assert_eq!(read("3.14").unwrap(), Value::float(3.14));
402    }
403
404    #[test]
405    fn test_read_string() {
406        assert_eq!(read("\"hello\"").unwrap(), Value::string("hello"));
407    }
408
409    #[test]
410    fn test_read_symbol() {
411        assert_eq!(read("foo").unwrap(), Value::symbol("foo"));
412    }
413
414    #[test]
415    fn test_read_keyword() {
416        assert_eq!(read(":bar").unwrap(), Value::keyword("bar"));
417    }
418
419    #[test]
420    fn test_read_bool() {
421        assert_eq!(read("#t").unwrap(), Value::bool(true));
422        assert_eq!(read("#f").unwrap(), Value::bool(false));
423    }
424
425    #[test]
426    fn test_read_list() {
427        let result = read("(+ 1 2)").unwrap();
428        assert_eq!(
429            result,
430            Value::list(vec![Value::symbol("+"), Value::int(1), Value::int(2)])
431        );
432    }
433
434    #[test]
435    fn test_read_nested_list() {
436        let result = read("(* (+ 1 2) 3)").unwrap();
437        assert_eq!(
438            result,
439            Value::list(vec![
440                Value::symbol("*"),
441                Value::list(vec![Value::symbol("+"), Value::int(1), Value::int(2)]),
442                Value::int(3)
443            ])
444        );
445    }
446
447    #[test]
448    fn test_read_vector() {
449        let result = read("[1 2 3]").unwrap();
450        assert_eq!(
451            result,
452            Value::vector(vec![Value::int(1), Value::int(2), Value::int(3)])
453        );
454    }
455
456    #[test]
457    fn test_read_map() {
458        let result = read("{:a 1 :b 2}").unwrap();
459        let mut expected = BTreeMap::new();
460        expected.insert(Value::keyword("a"), Value::int(1));
461        expected.insert(Value::keyword("b"), Value::int(2));
462        assert_eq!(result, Value::map(expected));
463    }
464
465    #[test]
466    fn test_read_quote() {
467        let result = read("'foo").unwrap();
468        assert_eq!(
469            result,
470            Value::list(vec![Value::symbol("quote"), Value::symbol("foo")])
471        );
472    }
473
474    #[test]
475    fn test_read_quasiquote() {
476        let result = read("`(a ,b ,@c)").unwrap();
477        assert_eq!(
478            result,
479            Value::list(vec![
480                Value::symbol("quasiquote"),
481                Value::list(vec![
482                    Value::symbol("a"),
483                    Value::list(vec![Value::symbol("unquote"), Value::symbol("b")]),
484                    Value::list(vec![Value::symbol("unquote-splicing"), Value::symbol("c")]),
485                ])
486            ])
487        );
488    }
489
490    #[test]
491    fn test_read_nil() {
492        assert_eq!(read("nil").unwrap(), Value::nil());
493    }
494
495    #[test]
496    fn test_read_many_exprs() {
497        let results = read_many("1 2 3").unwrap();
498        assert_eq!(results, vec![Value::int(1), Value::int(2), Value::int(3)]);
499    }
500
501    #[test]
502    fn test_comments() {
503        let result = read_many("; comment\n(+ 1 2)").unwrap();
504        assert_eq!(result.len(), 1);
505    }
506
507    #[test]
508    fn test_read_zero() {
509        assert_eq!(read("0").unwrap(), Value::int(0));
510    }
511
512    #[test]
513    fn test_read_negative_zero() {
514        assert_eq!(read("-0").unwrap(), Value::int(0));
515    }
516
517    #[test]
518    fn test_read_leading_zeros() {
519        assert_eq!(read("007").unwrap(), Value::int(7));
520    }
521
522    #[test]
523    fn test_read_large_int() {
524        assert_eq!(read("9999999999999").unwrap(), Value::int(9999999999999));
525    }
526
527    #[test]
528    fn test_read_int_overflow() {
529        // i64::MAX + 1 should error, not silently wrap
530        assert!(read("9999999999999999999999").is_err());
531    }
532
533    #[test]
534    fn test_read_negative_float() {
535        assert_eq!(read("-2.5").unwrap(), Value::float(-2.5));
536    }
537
538    #[test]
539    fn test_read_float_leading_zero() {
540        assert_eq!(read("0.5").unwrap(), Value::float(0.5));
541    }
542
543    #[test]
544    fn test_read_minus_is_symbol() {
545        // Bare `-` should be a symbol (subtraction operator), not a number
546        assert_eq!(read("-").unwrap(), Value::symbol("-"));
547    }
548
549    #[test]
550    fn test_read_minus_in_list() {
551        // `(- 3)` should parse as call to `-` with arg 3
552        let result = read("(- 3)").unwrap();
553        assert_eq!(result, Value::list(vec![Value::symbol("-"), Value::int(3)]));
554    }
555
556    #[test]
557    fn test_read_negative_in_list() {
558        // `(-3)` should parse as list containing -3
559        let result = read("(-3)").unwrap();
560        assert_eq!(result, Value::list(vec![Value::int(-3)]));
561    }
562
563    #[test]
564    fn test_read_empty_string() {
565        assert_eq!(read(r#""""#).unwrap(), Value::string(""));
566    }
567
568    #[test]
569    fn test_read_string_with_escapes() {
570        assert_eq!(
571            read(r#""\n\t\r\\\"" "#).unwrap(),
572            Value::string("\n\t\r\\\"")
573        );
574    }
575
576    #[test]
577    fn test_read_string_unknown_escape() {
578        // Unknown escape sequences are preserved literally
579        assert_eq!(read(r#""\z""#).unwrap(), Value::string("\\z"));
580    }
581
582    #[test]
583    fn test_read_string_with_newline() {
584        assert_eq!(
585            read("\"line1\nline2\"").unwrap(),
586            Value::string("line1\nline2")
587        );
588    }
589
590    #[test]
591    fn test_read_unterminated_string() {
592        assert!(read("\"hello").is_err());
593    }
594
595    #[test]
596    fn test_read_string_escaped_quote_at_end() {
597        // `"test\"` — the backslash escapes the quote, string is unterminated
598        assert!(read(r#""test\""#).is_err());
599    }
600
601    #[test]
602    fn test_read_string_with_unicode() {
603        assert_eq!(read("\"héllo\"").unwrap(), Value::string("héllo"));
604        assert_eq!(read("\"日本語\"").unwrap(), Value::string("日本語"));
605        assert_eq!(read("\"🎉\"").unwrap(), Value::string("🎉"));
606    }
607
608    #[test]
609    fn test_read_string_with_parens() {
610        assert_eq!(read("\"(+ 1 2)\"").unwrap(), Value::string("(+ 1 2)"));
611    }
612
613    #[test]
614    fn test_read_operator_symbols() {
615        assert_eq!(read("+").unwrap(), Value::symbol("+"));
616        assert_eq!(read("*").unwrap(), Value::symbol("*"));
617        assert_eq!(read("/").unwrap(), Value::symbol("/"));
618        assert_eq!(read("<=").unwrap(), Value::symbol("<="));
619        assert_eq!(read(">=").unwrap(), Value::symbol(">="));
620    }
621
622    #[test]
623    fn test_read_predicate_symbols() {
624        assert_eq!(read("null?").unwrap(), Value::symbol("null?"));
625        assert_eq!(read("list?").unwrap(), Value::symbol("list?"));
626    }
627
628    #[test]
629    fn test_read_arrow_symbols() {
630        assert_eq!(
631            read("string->symbol").unwrap(),
632            Value::symbol("string->symbol")
633        );
634    }
635
636    #[test]
637    fn test_read_namespaced_symbols() {
638        assert_eq!(read("file/read").unwrap(), Value::symbol("file/read"));
639        assert_eq!(read("http/get").unwrap(), Value::symbol("http/get"));
640    }
641
642    #[test]
643    fn test_read_true_false_as_bool() {
644        assert_eq!(read("true").unwrap(), Value::bool(true));
645        assert_eq!(read("false").unwrap(), Value::bool(false));
646    }
647
648    #[test]
649    fn test_read_bare_colon_error() {
650        // `:` alone without a name should error
651        assert!(read(":").is_err());
652    }
653
654    #[test]
655    fn test_read_keyword_with_numbers() {
656        assert_eq!(read(":foo123").unwrap(), Value::keyword("foo123"));
657    }
658
659    #[test]
660    fn test_read_keyword_with_hyphens() {
661        assert_eq!(read(":max-turns").unwrap(), Value::keyword("max-turns"));
662    }
663
664    #[test]
665    fn test_read_hash_invalid() {
666        assert!(read("#x").is_err());
667        assert!(read("#").is_err());
668    }
669
670    #[test]
671    fn test_read_empty() {
672        assert_eq!(read("").unwrap(), Value::nil());
673    }
674
675    #[test]
676    fn test_read_whitespace_only() {
677        assert_eq!(read("   \n\t  ").unwrap(), Value::nil());
678    }
679
680    #[test]
681    fn test_read_many_empty() {
682        assert_eq!(read_many("").unwrap(), vec![]);
683    }
684
685    #[test]
686    fn test_read_many_whitespace_only() {
687        assert_eq!(read_many("  \n  ").unwrap(), vec![]);
688    }
689
690    #[test]
691    fn test_read_comment_only() {
692        assert_eq!(read_many("; just a comment").unwrap(), vec![]);
693    }
694
695    #[test]
696    fn test_read_empty_list() {
697        assert_eq!(read("()").unwrap(), Value::list(vec![]));
698    }
699
700    #[test]
701    fn test_read_deeply_nested() {
702        let result = read("((((42))))").unwrap();
703        assert_eq!(
704            result,
705            Value::list(vec![Value::list(vec![Value::list(vec![Value::list(
706                vec![Value::int(42)]
707            )])])])
708        );
709    }
710
711    #[test]
712    fn test_read_unterminated_list() {
713        assert!(read("(1 2").is_err());
714    }
715
716    #[test]
717    fn test_read_extra_rparen() {
718        // `read` only reads one expr, so extra `)` is just ignored (not consumed)
719        // But `read_many` should fail since `)` is not a valid expr start
720        let result = read("42").unwrap();
721        assert_eq!(result, Value::int(42));
722    }
723
724    #[test]
725    fn test_read_dotted_pair() {
726        let result = read("(a . b)").unwrap();
727        assert_eq!(
728            result,
729            Value::list(vec![
730                Value::symbol("a"),
731                Value::symbol("."),
732                Value::symbol("b")
733            ])
734        );
735    }
736
737    #[test]
738    fn test_read_empty_vector() {
739        assert_eq!(read("[]").unwrap(), Value::vector(vec![]));
740    }
741
742    #[test]
743    fn test_read_unterminated_vector() {
744        assert!(read("[1 2").is_err());
745    }
746
747    #[test]
748    fn test_read_empty_map() {
749        assert_eq!(read("{}").unwrap(), Value::map(BTreeMap::new()));
750    }
751
752    #[test]
753    fn test_read_unterminated_map() {
754        assert!(read("{:a 1").is_err());
755    }
756
757    #[test]
758    fn test_read_map_odd_elements() {
759        assert!(read("{:a 1 :b}").is_err());
760    }
761
762    #[test]
763    fn test_read_map_duplicate_keys() {
764        // Later key wins (BTreeMap insert replaces)
765        let result = read("{:a 1 :a 2}").unwrap();
766        let mut expected = BTreeMap::new();
767        expected.insert(Value::keyword("a"), Value::int(2));
768        assert_eq!(result, Value::map(expected));
769    }
770
771    #[test]
772    fn test_read_nested_quote() {
773        let result = read("''foo").unwrap();
774        assert_eq!(
775            result,
776            Value::list(vec![
777                Value::symbol("quote"),
778                Value::list(vec![Value::symbol("quote"), Value::symbol("foo")])
779            ])
780        );
781    }
782
783    #[test]
784    fn test_read_quote_list() {
785        let result = read("'(1 2 3)").unwrap();
786        assert_eq!(
787            result,
788            Value::list(vec![
789                Value::symbol("quote"),
790                Value::list(vec![Value::int(1), Value::int(2), Value::int(3)])
791            ])
792        );
793    }
794
795    #[test]
796    fn test_read_quote_at_eof() {
797        assert!(read("'").is_err());
798    }
799
800    #[test]
801    fn test_read_unquote_at_eof() {
802        assert!(read(",").is_err());
803    }
804
805    #[test]
806    fn test_read_unquote_splice_at_eof() {
807        assert!(read(",@").is_err());
808    }
809
810    #[test]
811    fn test_read_quasiquote_at_eof() {
812        assert!(read("`").is_err());
813    }
814
815    #[test]
816    fn test_read_comment_after_expr() {
817        assert_eq!(read_many("42 ; comment").unwrap(), vec![Value::int(42)]);
818    }
819
820    #[test]
821    fn test_read_multiple_comments() {
822        let result = read_many("; first\n; second\n42").unwrap();
823        assert_eq!(result, vec![Value::int(42)]);
824    }
825
826    #[test]
827    fn test_read_comment_no_newline() {
828        // Comment at end of input without trailing newline
829        assert_eq!(read_many("; comment").unwrap(), vec![]);
830    }
831
832    #[test]
833    fn test_read_crlf_line_endings() {
834        let result = read_many("1\r\n2\r\n3").unwrap();
835        assert_eq!(result, vec![Value::int(1), Value::int(2), Value::int(3)]);
836    }
837
838    #[test]
839    fn test_read_tabs_as_whitespace() {
840        assert_eq!(
841            read("(\t+\t1\t2\t)").unwrap(),
842            Value::list(vec![Value::symbol("+"), Value::int(1), Value::int(2)])
843        );
844    }
845
846    #[test]
847    fn test_read_mixed_collections() {
848        // List containing vector and map
849        let result = read("([1 2] {:a 3})").unwrap();
850        let mut map = BTreeMap::new();
851        map.insert(Value::keyword("a"), Value::int(3));
852        assert_eq!(
853            result,
854            Value::list(vec![
855                Value::vector(vec![Value::int(1), Value::int(2)]),
856                Value::map(map)
857            ])
858        );
859    }
860
861    #[test]
862    fn test_read_many_mixed_types() {
863        let result = read_many(r#"42 3.14 "hello" foo :bar #t nil"#).unwrap();
864        assert_eq!(result.len(), 7);
865        assert_eq!(result[0], Value::int(42));
866        assert_eq!(result[1], Value::float(3.14));
867        assert_eq!(result[2], Value::string("hello"));
868        assert_eq!(result[3], Value::symbol("foo"));
869        assert_eq!(result[4], Value::keyword("bar"));
870        assert_eq!(result[5], Value::bool(true));
871        assert_eq!(result[6], Value::nil());
872    }
873
874    #[test]
875    fn test_span_map_tracks_lists() {
876        let (exprs, spans) = read_many_with_spans("(+ 1 2)").unwrap();
877        assert_eq!(exprs.len(), 1);
878        // The list should have a span entry
879        let rc = exprs[0].as_list_rc().expect("expected list");
880        let ptr = Rc::as_ptr(&rc) as usize;
881        let span = spans.get(&ptr).expect("list should have span");
882        assert_eq!(span.line, 1);
883        assert_eq!(span.col, 1);
884    }
885
886    #[test]
887    fn test_span_map_multiline() {
888        let (exprs, spans) = read_many_with_spans("(foo)\n(bar)").unwrap();
889        assert_eq!(exprs.len(), 2);
890        let rc = exprs[1].as_list_rc().expect("expected list");
891        let ptr = Rc::as_ptr(&rc) as usize;
892        let span = spans.get(&ptr).expect("second list should have span");
893        assert_eq!(span.line, 2);
894        assert_eq!(span.col, 1);
895    }
896
897    #[test]
898    fn test_read_unexpected_char() {
899        assert!(read("@").is_err());
900        assert!(read("$").is_err());
901    }
902
903    #[test]
904    fn test_read_char_literal() {
905        assert_eq!(read("#\\a").unwrap(), Value::char('a'));
906        assert_eq!(read("#\\Z").unwrap(), Value::char('Z'));
907        assert_eq!(read("#\\0").unwrap(), Value::char('0'));
908    }
909
910    #[test]
911    fn test_read_char_named() {
912        assert_eq!(read("#\\space").unwrap(), Value::char(' '));
913        assert_eq!(read("#\\newline").unwrap(), Value::char('\n'));
914        assert_eq!(read("#\\tab").unwrap(), Value::char('\t'));
915        assert_eq!(read("#\\return").unwrap(), Value::char('\r'));
916        assert_eq!(read("#\\nul").unwrap(), Value::char('\0'));
917    }
918
919    #[test]
920    fn test_read_char_special() {
921        assert_eq!(read("#\\(").unwrap(), Value::char('('));
922        assert_eq!(read("#\\)").unwrap(), Value::char(')'));
923    }
924
925    #[test]
926    fn test_read_char_in_list() {
927        let result = read("(#\\a #\\b)").unwrap();
928        assert_eq!(
929            result,
930            Value::list(vec![Value::char('a'), Value::char('b')])
931        );
932    }
933
934    #[test]
935    fn test_read_char_unknown_name() {
936        assert!(read("#\\foobar").is_err());
937    }
938
939    #[test]
940    fn test_read_char_eof() {
941        assert!(read("#\\").is_err());
942    }
943
944    #[test]
945    fn test_read_bytevector_literal() {
946        assert_eq!(
947            read("#u8(1 2 3)").unwrap(),
948            Value::bytevector(vec![1, 2, 3])
949        );
950    }
951
952    #[test]
953    fn test_read_bytevector_empty() {
954        assert_eq!(read("#u8()").unwrap(), Value::bytevector(vec![]));
955    }
956
957    #[test]
958    fn test_read_bytevector_single() {
959        assert_eq!(read("#u8(255)").unwrap(), Value::bytevector(vec![255]));
960    }
961
962    #[test]
963    fn test_read_bytevector_out_of_range() {
964        assert!(read("#u8(256)").is_err());
965    }
966
967    #[test]
968    fn test_read_bytevector_negative() {
969        assert!(read("#u8(-1)").is_err());
970    }
971
972    #[test]
973    fn test_read_bytevector_non_integer() {
974        assert!(read("#u8(1.5)").is_err());
975    }
976
977    #[test]
978    fn test_read_bytevector_unterminated() {
979        assert!(read("#u8(1 2").is_err());
980    }
981
982    #[test]
983    fn test_read_bytevector_in_list() {
984        let result = read("(#u8(1 2) #u8(3))").unwrap();
985        assert_eq!(
986            result,
987            Value::list(vec![
988                Value::bytevector(vec![1, 2]),
989                Value::bytevector(vec![3]),
990            ])
991        );
992    }
993
994    #[test]
995    fn test_read_string_hex_escape_basic() {
996        // \x41; is 'A'
997        let result = read(r#""\x41;""#).unwrap();
998        assert_eq!(result, Value::string("A"));
999    }
1000
1001    #[test]
1002    fn test_read_string_hex_escape_lowercase() {
1003        let result = read(r#""\x6c;""#).unwrap();
1004        assert_eq!(result, Value::string("l"));
1005    }
1006
1007    #[test]
1008    fn test_read_string_hex_escape_mixed_case() {
1009        let result = read(r#""\x4F;""#).unwrap();
1010        assert_eq!(result, Value::string("O"));
1011    }
1012
1013    #[test]
1014    fn test_read_string_hex_escape_esc_char() {
1015        // \x1B; is ESC (0x1b) — the main motivating use case
1016        let result = read(r#""\x1B;""#).unwrap();
1017        assert_eq!(result, Value::string("\x1B"));
1018    }
1019
1020    #[test]
1021    fn test_read_string_hex_escape_null() {
1022        let result = read(r#""\x0;""#).unwrap();
1023        assert_eq!(result, Value::string("\0"));
1024    }
1025
1026    #[test]
1027    fn test_read_string_hex_escape_unicode() {
1028        // \x3BB; is λ (Greek small letter lambda)
1029        let result = read(r#""\x3BB;""#).unwrap();
1030        assert_eq!(result, Value::string("λ"));
1031    }
1032
1033    #[test]
1034    fn test_read_string_hex_escape_emoji() {
1035        // \x1F600; is 😀
1036        let result = read(r#""\x1F600;""#).unwrap();
1037        assert_eq!(result, Value::string("😀"));
1038    }
1039
1040    #[test]
1041    fn test_read_string_hex_escape_in_context() {
1042        // Mix hex escapes with regular text and other escapes
1043        let result = read(r#""hello\x20;world""#).unwrap();
1044        assert_eq!(result, Value::string("hello world"));
1045    }
1046
1047    #[test]
1048    fn test_read_string_hex_escape_multiple() {
1049        let result = read(r#""\x48;\x69;""#).unwrap();
1050        assert_eq!(result, Value::string("Hi"));
1051    }
1052
1053    #[test]
1054    fn test_read_string_hex_escape_missing_semicolon() {
1055        assert!(read(r#""\x41""#).is_err());
1056    }
1057
1058    #[test]
1059    fn test_read_string_hex_escape_no_digits() {
1060        assert!(read(r#""\x;""#).is_err());
1061    }
1062
1063    #[test]
1064    fn test_read_string_hex_escape_invalid_hex() {
1065        assert!(read(r#""\xGG;""#).is_err());
1066    }
1067
1068    #[test]
1069    fn test_read_string_hex_escape_invalid_codepoint() {
1070        // 0xD800 is a surrogate — invalid Unicode scalar
1071        assert!(read(r#""\xD800;""#).is_err());
1072    }
1073
1074    #[test]
1075    fn test_read_string_hex_escape_too_large() {
1076        // 0x110000 is above Unicode max
1077        assert!(read(r#""\x110000;""#).is_err());
1078    }
1079
1080    #[test]
1081    fn test_read_string_u_escape_basic() {
1082        // \u0041 is 'A'
1083        let result = read(r#""\u0041""#).unwrap();
1084        assert_eq!(result, Value::string("A"));
1085    }
1086
1087    #[test]
1088    fn test_read_string_u_escape_lambda() {
1089        let result = read(r#""\u03BB""#).unwrap();
1090        assert_eq!(result, Value::string("λ"));
1091    }
1092
1093    #[test]
1094    fn test_read_string_u_escape_esc() {
1095        let result = read(r#""\u001B""#).unwrap();
1096        assert_eq!(result, Value::string("\x1B"));
1097    }
1098
1099    #[test]
1100    fn test_read_string_u_escape_too_few_digits() {
1101        assert!(read(r#""\u041""#).is_err());
1102    }
1103
1104    #[test]
1105    fn test_read_string_u_escape_surrogate() {
1106        assert!(read(r#""\uD800""#).is_err());
1107    }
1108
1109    #[test]
1110    fn test_read_string_big_u_escape_basic() {
1111        let result = read(r#""\U00000041""#).unwrap();
1112        assert_eq!(result, Value::string("A"));
1113    }
1114
1115    #[test]
1116    fn test_read_string_big_u_escape_emoji() {
1117        let result = read(r#""\U0001F600""#).unwrap();
1118        assert_eq!(result, Value::string("😀"));
1119    }
1120
1121    #[test]
1122    fn test_read_string_big_u_escape_too_few_digits() {
1123        assert!(read(r#""\U0041""#).is_err());
1124    }
1125
1126    #[test]
1127    fn test_read_string_big_u_escape_invalid() {
1128        assert!(read(r#""\U00110000""#).is_err());
1129    }
1130
1131    #[test]
1132    fn test_read_string_null_escape() {
1133        let result = read(r#""\0""#).unwrap();
1134        assert_eq!(result, Value::string("\0"));
1135    }
1136
1137    #[test]
1138    fn test_read_string_mixed_escapes() {
1139        // Mix all escape types in one string
1140        let result = read(r#""\x48;\u0069\n\t""#).unwrap();
1141        assert_eq!(result, Value::string("Hi\n\t"));
1142    }
1143
1144    #[test]
1145    fn test_read_string_ansi_escape_sequence() {
1146        // Real-world: ANSI color code ESC[31m (red)
1147        let result = read(r#""\x1B;[31mRed\x1B;[0m""#).unwrap();
1148        assert_eq!(result, Value::string("\x1B[31mRed\x1B[0m"));
1149    }
1150}