Skip to main content

sema_reader/
reader.rs

1use std::collections::BTreeMap;
2use std::rc::Rc;
3
4use sema_core::{SemaError, Span, SpanMap, Value};
5
6use crate::lexer::{tokenize, SpannedToken, Token};
7
8struct Parser {
9    tokens: Vec<SpannedToken>,
10    pos: usize,
11    span_map: SpanMap,
12}
13
14impl Parser {
15    fn new(tokens: Vec<SpannedToken>) -> Self {
16        Parser {
17            tokens,
18            pos: 0,
19            span_map: SpanMap::new(),
20        }
21    }
22
23    fn peek(&self) -> Option<&Token> {
24        self.tokens.get(self.pos).map(|t| &t.token)
25    }
26
27    fn span(&self) -> Span {
28        self.tokens
29            .get(self.pos)
30            .map(|t| t.span)
31            .unwrap_or(Span { line: 0, col: 0 })
32    }
33
34    fn advance(&mut self) -> Option<&SpannedToken> {
35        let tok = self.tokens.get(self.pos);
36        if tok.is_some() {
37            self.pos += 1;
38        }
39        tok
40    }
41
42    fn expect(&mut self, expected: &Token) -> Result<(), SemaError> {
43        let span = self.span();
44        match self.advance() {
45            Some(t) if &t.token == expected => Ok(()),
46            Some(t) => Err(SemaError::Reader {
47                message: format!(
48                    "expected `{}`, got `{}`",
49                    token_display(expected),
50                    token_display(&t.token)
51                ),
52                span,
53            }),
54            None => Err(SemaError::Reader {
55                message: format!("expected `{}`, got end of input", token_display(expected)),
56                span,
57            }),
58        }
59    }
60
61    fn parse_expr(&mut self) -> Result<Value, SemaError> {
62        let span = self.span();
63        match self.peek() {
64            None => Err(SemaError::Reader {
65                message: "unexpected end of input".to_string(),
66                span,
67            }),
68            Some(Token::LParen) => self.parse_list(),
69            Some(Token::LBracket) => self.parse_vector(),
70            Some(Token::LBrace) => self.parse_map(),
71            Some(Token::Quote) => {
72                self.advance();
73                let inner = self.parse_expr().map_err(|_| {
74                    SemaError::Reader {
75                        message: "quote (') requires an expression after it".to_string(),
76                        span,
77                    }
78                    .with_hint("e.g. '(1 2 3) or 'foo")
79                })?;
80                self.make_list_with_span(vec![Value::symbol("quote"), inner], span)
81            }
82            Some(Token::Quasiquote) => {
83                self.advance();
84                let inner = self.parse_expr().map_err(|_| {
85                    SemaError::Reader {
86                        message: "quasiquote (`) requires an expression after it".to_string(),
87                        span,
88                    }
89                    .with_hint("e.g. `(list ,x)")
90                })?;
91                self.make_list_with_span(vec![Value::symbol("quasiquote"), inner], span)
92            }
93            Some(Token::Unquote) => {
94                self.advance();
95                let inner = self.parse_expr().map_err(|_| {
96                    SemaError::Reader {
97                        message: "unquote (,) requires an expression after it".to_string(),
98                        span,
99                    }
100                    .with_hint("use inside quasiquote, e.g. `(list ,x)")
101                })?;
102                self.make_list_with_span(vec![Value::symbol("unquote"), inner], span)
103            }
104            Some(Token::UnquoteSplice) => {
105                self.advance();
106                let inner = self.parse_expr().map_err(|_| {
107                    SemaError::Reader {
108                        message: "unquote-splicing (,@) requires an expression after it"
109                            .to_string(),
110                        span,
111                    }
112                    .with_hint("use inside quasiquote, e.g. `(list ,@xs)")
113                })?;
114                self.make_list_with_span(vec![Value::symbol("unquote-splicing"), inner], span)
115            }
116            Some(Token::BytevectorStart) => self.parse_bytevector(),
117            Some(_) => self.parse_atom(),
118        }
119    }
120
121    fn make_list_with_span(&mut self, items: Vec<Value>, span: Span) -> Result<Value, SemaError> {
122        let rc = Rc::new(items);
123        let ptr = Rc::as_ptr(&rc) as usize;
124        self.span_map.insert(ptr, span);
125        Ok(Value::list_from_rc(rc))
126    }
127
128    fn parse_list(&mut self) -> Result<Value, SemaError> {
129        let open_span = self.span();
130        self.expect(&Token::LParen)?;
131        let mut items = Vec::new();
132        while self.peek() != Some(&Token::RParen) {
133            if self.peek().is_none() {
134                return Err(SemaError::Reader {
135                    message: "unterminated list".to_string(),
136                    span: open_span,
137                }
138                .with_hint("add a closing `)`"));
139            }
140            // Handle dotted pairs: (a . b)
141            if self.peek() == Some(&Token::Dot) {
142                self.advance(); // skip dot
143                let cdr = self.parse_expr()?;
144                self.expect(&Token::RParen)?;
145                items.push(Value::symbol("."));
146                items.push(cdr);
147                return self.make_list_with_span(items, open_span);
148            }
149            items.push(self.parse_expr()?);
150        }
151        self.expect(&Token::RParen)?;
152        self.make_list_with_span(items, open_span)
153    }
154
155    fn parse_vector(&mut self) -> Result<Value, SemaError> {
156        let open_span = self.span();
157        self.expect(&Token::LBracket)?;
158        let mut items = Vec::new();
159        while self.peek() != Some(&Token::RBracket) {
160            if self.peek().is_none() {
161                return Err(SemaError::Reader {
162                    message: "unterminated vector".to_string(),
163                    span: open_span,
164                }
165                .with_hint("add a closing `]`"));
166            }
167            items.push(self.parse_expr()?);
168        }
169        self.expect(&Token::RBracket)?;
170        let rc = Rc::new(items);
171        let ptr = Rc::as_ptr(&rc) as usize;
172        self.span_map.insert(ptr, open_span);
173        Ok(Value::vector_from_rc(rc))
174    }
175
176    fn parse_map(&mut self) -> Result<Value, SemaError> {
177        let open_span = self.span();
178        self.expect(&Token::LBrace)?;
179        let mut map = BTreeMap::new();
180        while self.peek() != Some(&Token::RBrace) {
181            if self.peek().is_none() {
182                return Err(SemaError::Reader {
183                    message: "unterminated map".to_string(),
184                    span: open_span,
185                }
186                .with_hint("add a closing `}`"));
187            }
188            let key = self.parse_expr()?;
189            if self.peek() == Some(&Token::RBrace) || self.peek().is_none() {
190                return Err(SemaError::Reader {
191                    message: "map literal must have even number of forms".to_string(),
192                    span: self.span(),
193                });
194            }
195            let val = self.parse_expr()?;
196            map.insert(key, val);
197        }
198        self.expect(&Token::RBrace)?;
199        Ok(Value::map(map))
200    }
201
202    fn parse_bytevector(&mut self) -> Result<Value, SemaError> {
203        let open_span = self.span();
204        self.advance(); // consume BytevectorStart token
205        let mut bytes = Vec::new();
206        while self.peek() != Some(&Token::RParen) {
207            if self.peek().is_none() {
208                return Err(SemaError::Reader {
209                    message: "unterminated bytevector".to_string(),
210                    span: open_span,
211                }
212                .with_hint("add a closing `)`"));
213            }
214            let span = self.span();
215            match self.peek() {
216                Some(Token::Int(n)) => {
217                    let n = *n;
218                    self.advance();
219                    if !(0..=255).contains(&n) {
220                        return Err(SemaError::Reader {
221                            message: format!("#u8(...): byte value {n} out of range 0..255"),
222                            span,
223                        });
224                    }
225                    bytes.push(n as u8);
226                }
227                _ => {
228                    return Err(SemaError::Reader {
229                        message: "#u8(...): expected integer byte value".to_string(),
230                        span,
231                    });
232                }
233            }
234        }
235        self.expect(&Token::RParen)?;
236        Ok(Value::bytevector(bytes))
237    }
238
239    fn parse_atom(&mut self) -> Result<Value, SemaError> {
240        let span = self.span();
241        match self.advance() {
242            Some(SpannedToken {
243                token: Token::Int(n),
244                ..
245            }) => Ok(Value::int(*n)),
246            Some(SpannedToken {
247                token: Token::Float(f),
248                ..
249            }) => Ok(Value::float(*f)),
250            Some(SpannedToken {
251                token: Token::String(s),
252                ..
253            }) => Ok(Value::string(s)),
254            Some(SpannedToken {
255                token: Token::Symbol(s),
256                ..
257            }) => {
258                if s == "nil" {
259                    Ok(Value::nil())
260                } else {
261                    Ok(Value::symbol(s))
262                }
263            }
264            Some(SpannedToken {
265                token: Token::Keyword(s),
266                ..
267            }) => Ok(Value::keyword(s)),
268            Some(SpannedToken {
269                token: Token::Bool(b),
270                ..
271            }) => Ok(Value::bool(*b)),
272            Some(SpannedToken {
273                token: Token::Char(c),
274                ..
275            }) => Ok(Value::char(*c)),
276            Some(t) => {
277                let (name, hint) = match &t.token {
278                    Token::RParen => (
279                        "unexpected closing `)`",
280                        Some("no matching opening parenthesis"),
281                    ),
282                    Token::RBracket => (
283                        "unexpected closing `]`",
284                        Some("no matching opening bracket"),
285                    ),
286                    Token::RBrace => ("unexpected closing `}`", Some("no matching opening brace")),
287                    Token::Dot => (
288                        "unexpected `.`",
289                        Some("dots are used in pair notation, e.g. (a . b)"),
290                    ),
291                    _ => ("unexpected token", None),
292                };
293                let err = SemaError::Reader {
294                    message: name.to_string(),
295                    span,
296                };
297                Err(if let Some(h) = hint {
298                    err.with_hint(h)
299                } else {
300                    err
301                })
302            }
303            None => Err(SemaError::Reader {
304                message: "unexpected end of input".to_string(),
305                span,
306            }),
307        }
308    }
309}
310
311fn token_display(tok: &Token) -> &'static str {
312    match tok {
313        Token::LParen => "(",
314        Token::RParen => ")",
315        Token::LBracket => "[",
316        Token::RBracket => "]",
317        Token::LBrace => "{",
318        Token::RBrace => "}",
319        Token::Quote => "'",
320        Token::Quasiquote => "`",
321        Token::Unquote => ",",
322        Token::UnquoteSplice => ",@",
323        Token::Dot => ".",
324        Token::BytevectorStart => "#u8(",
325        Token::Int(_) => "integer",
326        Token::Float(_) => "float",
327        Token::String(_) => "string",
328        Token::Symbol(_) => "symbol",
329        Token::Keyword(_) => "keyword",
330        Token::Bool(_) => "boolean",
331        Token::Char(_) => "character",
332    }
333}
334
335/// Read a single s-expression from a string.
336pub fn read(input: &str) -> Result<Value, SemaError> {
337    let tokens = tokenize(input)?;
338    if tokens.is_empty() {
339        return Ok(Value::nil());
340    }
341    let mut parser = Parser::new(tokens);
342    parser.parse_expr()
343}
344
345/// Read all s-expressions from a string.
346pub fn read_many(input: &str) -> Result<Vec<Value>, SemaError> {
347    let tokens = tokenize(input)?;
348    if tokens.is_empty() {
349        return Ok(Vec::new());
350    }
351    let mut parser = Parser::new(tokens);
352    let mut exprs = Vec::new();
353    while parser.peek().is_some() {
354        exprs.push(parser.parse_expr()?);
355    }
356    Ok(exprs)
357}
358
359/// Read all s-expressions and return the accumulated span map.
360pub fn read_many_with_spans(input: &str) -> Result<(Vec<Value>, SpanMap), SemaError> {
361    let tokens = tokenize(input)?;
362    if tokens.is_empty() {
363        return Ok((Vec::new(), SpanMap::new()));
364    }
365    let mut parser = Parser::new(tokens);
366    let mut exprs = Vec::new();
367    while parser.peek().is_some() {
368        exprs.push(parser.parse_expr()?);
369    }
370    Ok((exprs, parser.span_map))
371}
372
373#[cfg(test)]
374mod tests {
375    use super::*;
376
377    #[test]
378    fn test_read_int() {
379        assert_eq!(read("42").unwrap(), Value::int(42));
380    }
381
382    #[test]
383    fn test_read_negative_int() {
384        assert_eq!(read("-7").unwrap(), Value::int(-7));
385    }
386
387    #[test]
388    fn test_read_float() {
389        assert_eq!(read("3.14").unwrap(), Value::float(3.14));
390    }
391
392    #[test]
393    fn test_read_string() {
394        assert_eq!(read("\"hello\"").unwrap(), Value::string("hello"));
395    }
396
397    #[test]
398    fn test_read_symbol() {
399        assert_eq!(read("foo").unwrap(), Value::symbol("foo"));
400    }
401
402    #[test]
403    fn test_read_keyword() {
404        assert_eq!(read(":bar").unwrap(), Value::keyword("bar"));
405    }
406
407    #[test]
408    fn test_read_bool() {
409        assert_eq!(read("#t").unwrap(), Value::bool(true));
410        assert_eq!(read("#f").unwrap(), Value::bool(false));
411    }
412
413    #[test]
414    fn test_read_list() {
415        let result = read("(+ 1 2)").unwrap();
416        assert_eq!(
417            result,
418            Value::list(vec![Value::symbol("+"), Value::int(1), Value::int(2)])
419        );
420    }
421
422    #[test]
423    fn test_read_nested_list() {
424        let result = read("(* (+ 1 2) 3)").unwrap();
425        assert_eq!(
426            result,
427            Value::list(vec![
428                Value::symbol("*"),
429                Value::list(vec![Value::symbol("+"), Value::int(1), Value::int(2)]),
430                Value::int(3)
431            ])
432        );
433    }
434
435    #[test]
436    fn test_read_vector() {
437        let result = read("[1 2 3]").unwrap();
438        assert_eq!(
439            result,
440            Value::vector(vec![Value::int(1), Value::int(2), Value::int(3)])
441        );
442    }
443
444    #[test]
445    fn test_read_map() {
446        let result = read("{:a 1 :b 2}").unwrap();
447        let mut expected = BTreeMap::new();
448        expected.insert(Value::keyword("a"), Value::int(1));
449        expected.insert(Value::keyword("b"), Value::int(2));
450        assert_eq!(result, Value::map(expected));
451    }
452
453    #[test]
454    fn test_read_quote() {
455        let result = read("'foo").unwrap();
456        assert_eq!(
457            result,
458            Value::list(vec![Value::symbol("quote"), Value::symbol("foo")])
459        );
460    }
461
462    #[test]
463    fn test_read_quasiquote() {
464        let result = read("`(a ,b ,@c)").unwrap();
465        assert_eq!(
466            result,
467            Value::list(vec![
468                Value::symbol("quasiquote"),
469                Value::list(vec![
470                    Value::symbol("a"),
471                    Value::list(vec![Value::symbol("unquote"), Value::symbol("b")]),
472                    Value::list(vec![Value::symbol("unquote-splicing"), Value::symbol("c")]),
473                ])
474            ])
475        );
476    }
477
478    #[test]
479    fn test_read_nil() {
480        assert_eq!(read("nil").unwrap(), Value::nil());
481    }
482
483    #[test]
484    fn test_read_many_exprs() {
485        let results = read_many("1 2 3").unwrap();
486        assert_eq!(results, vec![Value::int(1), Value::int(2), Value::int(3)]);
487    }
488
489    #[test]
490    fn test_comments() {
491        let result = read_many("; comment\n(+ 1 2)").unwrap();
492        assert_eq!(result.len(), 1);
493    }
494
495    #[test]
496    fn test_read_zero() {
497        assert_eq!(read("0").unwrap(), Value::int(0));
498    }
499
500    #[test]
501    fn test_read_negative_zero() {
502        assert_eq!(read("-0").unwrap(), Value::int(0));
503    }
504
505    #[test]
506    fn test_read_leading_zeros() {
507        assert_eq!(read("007").unwrap(), Value::int(7));
508    }
509
510    #[test]
511    fn test_read_large_int() {
512        assert_eq!(read("9999999999999").unwrap(), Value::int(9999999999999));
513    }
514
515    #[test]
516    fn test_read_int_overflow() {
517        // i64::MAX + 1 should error, not silently wrap
518        assert!(read("9999999999999999999999").is_err());
519    }
520
521    #[test]
522    fn test_read_negative_float() {
523        assert_eq!(read("-2.5").unwrap(), Value::float(-2.5));
524    }
525
526    #[test]
527    fn test_read_float_leading_zero() {
528        assert_eq!(read("0.5").unwrap(), Value::float(0.5));
529    }
530
531    #[test]
532    fn test_read_minus_is_symbol() {
533        // Bare `-` should be a symbol (subtraction operator), not a number
534        assert_eq!(read("-").unwrap(), Value::symbol("-"));
535    }
536
537    #[test]
538    fn test_read_minus_in_list() {
539        // `(- 3)` should parse as call to `-` with arg 3
540        let result = read("(- 3)").unwrap();
541        assert_eq!(result, Value::list(vec![Value::symbol("-"), Value::int(3)]));
542    }
543
544    #[test]
545    fn test_read_negative_in_list() {
546        // `(-3)` should parse as list containing -3
547        let result = read("(-3)").unwrap();
548        assert_eq!(result, Value::list(vec![Value::int(-3)]));
549    }
550
551    #[test]
552    fn test_read_empty_string() {
553        assert_eq!(read(r#""""#).unwrap(), Value::string(""));
554    }
555
556    #[test]
557    fn test_read_string_with_escapes() {
558        assert_eq!(
559            read(r#""\n\t\r\\\"" "#).unwrap(),
560            Value::string("\n\t\r\\\"")
561        );
562    }
563
564    #[test]
565    fn test_read_string_unknown_escape() {
566        // Unknown escape sequences are preserved literally
567        assert_eq!(read(r#""\z""#).unwrap(), Value::string("\\z"));
568    }
569
570    #[test]
571    fn test_read_string_with_newline() {
572        assert_eq!(
573            read("\"line1\nline2\"").unwrap(),
574            Value::string("line1\nline2")
575        );
576    }
577
578    #[test]
579    fn test_read_unterminated_string() {
580        assert!(read("\"hello").is_err());
581    }
582
583    #[test]
584    fn test_read_string_escaped_quote_at_end() {
585        // `"test\"` — the backslash escapes the quote, string is unterminated
586        assert!(read(r#""test\""#).is_err());
587    }
588
589    #[test]
590    fn test_read_string_with_unicode() {
591        assert_eq!(read("\"héllo\"").unwrap(), Value::string("héllo"));
592        assert_eq!(read("\"日本語\"").unwrap(), Value::string("日本語"));
593        assert_eq!(read("\"🎉\"").unwrap(), Value::string("🎉"));
594    }
595
596    #[test]
597    fn test_read_string_with_parens() {
598        assert_eq!(read("\"(+ 1 2)\"").unwrap(), Value::string("(+ 1 2)"));
599    }
600
601    #[test]
602    fn test_read_operator_symbols() {
603        assert_eq!(read("+").unwrap(), Value::symbol("+"));
604        assert_eq!(read("*").unwrap(), Value::symbol("*"));
605        assert_eq!(read("/").unwrap(), Value::symbol("/"));
606        assert_eq!(read("<=").unwrap(), Value::symbol("<="));
607        assert_eq!(read(">=").unwrap(), Value::symbol(">="));
608    }
609
610    #[test]
611    fn test_read_predicate_symbols() {
612        assert_eq!(read("null?").unwrap(), Value::symbol("null?"));
613        assert_eq!(read("list?").unwrap(), Value::symbol("list?"));
614    }
615
616    #[test]
617    fn test_read_arrow_symbols() {
618        assert_eq!(
619            read("string->symbol").unwrap(),
620            Value::symbol("string->symbol")
621        );
622    }
623
624    #[test]
625    fn test_read_namespaced_symbols() {
626        assert_eq!(read("file/read").unwrap(), Value::symbol("file/read"));
627        assert_eq!(read("http/get").unwrap(), Value::symbol("http/get"));
628    }
629
630    #[test]
631    fn test_read_true_false_as_bool() {
632        assert_eq!(read("true").unwrap(), Value::bool(true));
633        assert_eq!(read("false").unwrap(), Value::bool(false));
634    }
635
636    #[test]
637    fn test_read_bare_colon_error() {
638        // `:` alone without a name should error
639        assert!(read(":").is_err());
640    }
641
642    #[test]
643    fn test_read_keyword_with_numbers() {
644        assert_eq!(read(":foo123").unwrap(), Value::keyword("foo123"));
645    }
646
647    #[test]
648    fn test_read_keyword_with_hyphens() {
649        assert_eq!(read(":max-turns").unwrap(), Value::keyword("max-turns"));
650    }
651
652    #[test]
653    fn test_read_hash_invalid() {
654        assert!(read("#x").is_err());
655        assert!(read("#").is_err());
656    }
657
658    #[test]
659    fn test_read_empty() {
660        assert_eq!(read("").unwrap(), Value::nil());
661    }
662
663    #[test]
664    fn test_read_whitespace_only() {
665        assert_eq!(read("   \n\t  ").unwrap(), Value::nil());
666    }
667
668    #[test]
669    fn test_read_many_empty() {
670        assert_eq!(read_many("").unwrap(), vec![]);
671    }
672
673    #[test]
674    fn test_read_many_whitespace_only() {
675        assert_eq!(read_many("  \n  ").unwrap(), vec![]);
676    }
677
678    #[test]
679    fn test_read_comment_only() {
680        assert_eq!(read_many("; just a comment").unwrap(), vec![]);
681    }
682
683    #[test]
684    fn test_read_empty_list() {
685        assert_eq!(read("()").unwrap(), Value::list(vec![]));
686    }
687
688    #[test]
689    fn test_read_deeply_nested() {
690        let result = read("((((42))))").unwrap();
691        assert_eq!(
692            result,
693            Value::list(vec![Value::list(vec![Value::list(vec![Value::list(
694                vec![Value::int(42)]
695            )])])])
696        );
697    }
698
699    #[test]
700    fn test_read_unterminated_list() {
701        assert!(read("(1 2").is_err());
702    }
703
704    #[test]
705    fn test_read_extra_rparen() {
706        // `read` only reads one expr, so extra `)` is just ignored (not consumed)
707        // But `read_many` should fail since `)` is not a valid expr start
708        let result = read("42").unwrap();
709        assert_eq!(result, Value::int(42));
710    }
711
712    #[test]
713    fn test_read_dotted_pair() {
714        let result = read("(a . b)").unwrap();
715        assert_eq!(
716            result,
717            Value::list(vec![
718                Value::symbol("a"),
719                Value::symbol("."),
720                Value::symbol("b")
721            ])
722        );
723    }
724
725    #[test]
726    fn test_read_empty_vector() {
727        assert_eq!(read("[]").unwrap(), Value::vector(vec![]));
728    }
729
730    #[test]
731    fn test_read_unterminated_vector() {
732        assert!(read("[1 2").is_err());
733    }
734
735    #[test]
736    fn test_read_empty_map() {
737        assert_eq!(read("{}").unwrap(), Value::map(BTreeMap::new()));
738    }
739
740    #[test]
741    fn test_read_unterminated_map() {
742        assert!(read("{:a 1").is_err());
743    }
744
745    #[test]
746    fn test_read_map_odd_elements() {
747        assert!(read("{:a 1 :b}").is_err());
748    }
749
750    #[test]
751    fn test_read_map_duplicate_keys() {
752        // Later key wins (BTreeMap insert replaces)
753        let result = read("{:a 1 :a 2}").unwrap();
754        let mut expected = BTreeMap::new();
755        expected.insert(Value::keyword("a"), Value::int(2));
756        assert_eq!(result, Value::map(expected));
757    }
758
759    #[test]
760    fn test_read_nested_quote() {
761        let result = read("''foo").unwrap();
762        assert_eq!(
763            result,
764            Value::list(vec![
765                Value::symbol("quote"),
766                Value::list(vec![Value::symbol("quote"), Value::symbol("foo")])
767            ])
768        );
769    }
770
771    #[test]
772    fn test_read_quote_list() {
773        let result = read("'(1 2 3)").unwrap();
774        assert_eq!(
775            result,
776            Value::list(vec![
777                Value::symbol("quote"),
778                Value::list(vec![Value::int(1), Value::int(2), Value::int(3)])
779            ])
780        );
781    }
782
783    #[test]
784    fn test_read_quote_at_eof() {
785        assert!(read("'").is_err());
786    }
787
788    #[test]
789    fn test_read_unquote_at_eof() {
790        assert!(read(",").is_err());
791    }
792
793    #[test]
794    fn test_read_unquote_splice_at_eof() {
795        assert!(read(",@").is_err());
796    }
797
798    #[test]
799    fn test_read_quasiquote_at_eof() {
800        assert!(read("`").is_err());
801    }
802
803    #[test]
804    fn test_read_comment_after_expr() {
805        assert_eq!(read_many("42 ; comment").unwrap(), vec![Value::int(42)]);
806    }
807
808    #[test]
809    fn test_read_multiple_comments() {
810        let result = read_many("; first\n; second\n42").unwrap();
811        assert_eq!(result, vec![Value::int(42)]);
812    }
813
814    #[test]
815    fn test_read_comment_no_newline() {
816        // Comment at end of input without trailing newline
817        assert_eq!(read_many("; comment").unwrap(), vec![]);
818    }
819
820    #[test]
821    fn test_read_crlf_line_endings() {
822        let result = read_many("1\r\n2\r\n3").unwrap();
823        assert_eq!(result, vec![Value::int(1), Value::int(2), Value::int(3)]);
824    }
825
826    #[test]
827    fn test_read_tabs_as_whitespace() {
828        assert_eq!(
829            read("(\t+\t1\t2\t)").unwrap(),
830            Value::list(vec![Value::symbol("+"), Value::int(1), Value::int(2)])
831        );
832    }
833
834    #[test]
835    fn test_read_mixed_collections() {
836        // List containing vector and map
837        let result = read("([1 2] {:a 3})").unwrap();
838        let mut map = BTreeMap::new();
839        map.insert(Value::keyword("a"), Value::int(3));
840        assert_eq!(
841            result,
842            Value::list(vec![
843                Value::vector(vec![Value::int(1), Value::int(2)]),
844                Value::map(map)
845            ])
846        );
847    }
848
849    #[test]
850    fn test_read_many_mixed_types() {
851        let result = read_many(r#"42 3.14 "hello" foo :bar #t nil"#).unwrap();
852        assert_eq!(result.len(), 7);
853        assert_eq!(result[0], Value::int(42));
854        assert_eq!(result[1], Value::float(3.14));
855        assert_eq!(result[2], Value::string("hello"));
856        assert_eq!(result[3], Value::symbol("foo"));
857        assert_eq!(result[4], Value::keyword("bar"));
858        assert_eq!(result[5], Value::bool(true));
859        assert_eq!(result[6], Value::nil());
860    }
861
862    #[test]
863    fn test_span_map_tracks_lists() {
864        let (exprs, spans) = read_many_with_spans("(+ 1 2)").unwrap();
865        assert_eq!(exprs.len(), 1);
866        // The list should have a span entry
867        let rc = exprs[0].as_list_rc().expect("expected list");
868        let ptr = Rc::as_ptr(&rc) as usize;
869        let span = spans.get(&ptr).expect("list should have span");
870        assert_eq!(span.line, 1);
871        assert_eq!(span.col, 1);
872    }
873
874    #[test]
875    fn test_span_map_multiline() {
876        let (exprs, spans) = read_many_with_spans("(foo)\n(bar)").unwrap();
877        assert_eq!(exprs.len(), 2);
878        let rc = exprs[1].as_list_rc().expect("expected list");
879        let ptr = Rc::as_ptr(&rc) as usize;
880        let span = spans.get(&ptr).expect("second list should have span");
881        assert_eq!(span.line, 2);
882        assert_eq!(span.col, 1);
883    }
884
885    #[test]
886    fn test_read_unexpected_char() {
887        assert!(read("@").is_err());
888        assert!(read("$").is_err());
889    }
890
891    #[test]
892    fn test_read_char_literal() {
893        assert_eq!(read("#\\a").unwrap(), Value::char('a'));
894        assert_eq!(read("#\\Z").unwrap(), Value::char('Z'));
895        assert_eq!(read("#\\0").unwrap(), Value::char('0'));
896    }
897
898    #[test]
899    fn test_read_char_named() {
900        assert_eq!(read("#\\space").unwrap(), Value::char(' '));
901        assert_eq!(read("#\\newline").unwrap(), Value::char('\n'));
902        assert_eq!(read("#\\tab").unwrap(), Value::char('\t'));
903        assert_eq!(read("#\\return").unwrap(), Value::char('\r'));
904        assert_eq!(read("#\\nul").unwrap(), Value::char('\0'));
905    }
906
907    #[test]
908    fn test_read_char_special() {
909        assert_eq!(read("#\\(").unwrap(), Value::char('('));
910        assert_eq!(read("#\\)").unwrap(), Value::char(')'));
911    }
912
913    #[test]
914    fn test_read_char_in_list() {
915        let result = read("(#\\a #\\b)").unwrap();
916        assert_eq!(
917            result,
918            Value::list(vec![Value::char('a'), Value::char('b')])
919        );
920    }
921
922    #[test]
923    fn test_read_char_unknown_name() {
924        assert!(read("#\\foobar").is_err());
925    }
926
927    #[test]
928    fn test_read_char_eof() {
929        assert!(read("#\\").is_err());
930    }
931
932    #[test]
933    fn test_read_bytevector_literal() {
934        assert_eq!(
935            read("#u8(1 2 3)").unwrap(),
936            Value::bytevector(vec![1, 2, 3])
937        );
938    }
939
940    #[test]
941    fn test_read_bytevector_empty() {
942        assert_eq!(read("#u8()").unwrap(), Value::bytevector(vec![]));
943    }
944
945    #[test]
946    fn test_read_bytevector_single() {
947        assert_eq!(read("#u8(255)").unwrap(), Value::bytevector(vec![255]));
948    }
949
950    #[test]
951    fn test_read_bytevector_out_of_range() {
952        assert!(read("#u8(256)").is_err());
953    }
954
955    #[test]
956    fn test_read_bytevector_negative() {
957        assert!(read("#u8(-1)").is_err());
958    }
959
960    #[test]
961    fn test_read_bytevector_non_integer() {
962        assert!(read("#u8(1.5)").is_err());
963    }
964
965    #[test]
966    fn test_read_bytevector_unterminated() {
967        assert!(read("#u8(1 2").is_err());
968    }
969
970    #[test]
971    fn test_read_bytevector_in_list() {
972        let result = read("(#u8(1 2) #u8(3))").unwrap();
973        assert_eq!(
974            result,
975            Value::list(vec![
976                Value::bytevector(vec![1, 2]),
977                Value::bytevector(vec![3]),
978            ])
979        );
980    }
981
982    #[test]
983    fn test_read_string_hex_escape_basic() {
984        // \x41; is 'A'
985        let result = read(r#""\x41;""#).unwrap();
986        assert_eq!(result, Value::string("A"));
987    }
988
989    #[test]
990    fn test_read_string_hex_escape_lowercase() {
991        let result = read(r#""\x6c;""#).unwrap();
992        assert_eq!(result, Value::string("l"));
993    }
994
995    #[test]
996    fn test_read_string_hex_escape_mixed_case() {
997        let result = read(r#""\x4F;""#).unwrap();
998        assert_eq!(result, Value::string("O"));
999    }
1000
1001    #[test]
1002    fn test_read_string_hex_escape_esc_char() {
1003        // \x1B; is ESC (0x1b) — the main motivating use case
1004        let result = read(r#""\x1B;""#).unwrap();
1005        assert_eq!(result, Value::string("\x1B"));
1006    }
1007
1008    #[test]
1009    fn test_read_string_hex_escape_null() {
1010        let result = read(r#""\x0;""#).unwrap();
1011        assert_eq!(result, Value::string("\0"));
1012    }
1013
1014    #[test]
1015    fn test_read_string_hex_escape_unicode() {
1016        // \x3BB; is λ (Greek small letter lambda)
1017        let result = read(r#""\x3BB;""#).unwrap();
1018        assert_eq!(result, Value::string("λ"));
1019    }
1020
1021    #[test]
1022    fn test_read_string_hex_escape_emoji() {
1023        // \x1F600; is 😀
1024        let result = read(r#""\x1F600;""#).unwrap();
1025        assert_eq!(result, Value::string("😀"));
1026    }
1027
1028    #[test]
1029    fn test_read_string_hex_escape_in_context() {
1030        // Mix hex escapes with regular text and other escapes
1031        let result = read(r#""hello\x20;world""#).unwrap();
1032        assert_eq!(result, Value::string("hello world"));
1033    }
1034
1035    #[test]
1036    fn test_read_string_hex_escape_multiple() {
1037        let result = read(r#""\x48;\x69;""#).unwrap();
1038        assert_eq!(result, Value::string("Hi"));
1039    }
1040
1041    #[test]
1042    fn test_read_string_hex_escape_missing_semicolon() {
1043        assert!(read(r#""\x41""#).is_err());
1044    }
1045
1046    #[test]
1047    fn test_read_string_hex_escape_no_digits() {
1048        assert!(read(r#""\x;""#).is_err());
1049    }
1050
1051    #[test]
1052    fn test_read_string_hex_escape_invalid_hex() {
1053        assert!(read(r#""\xGG;""#).is_err());
1054    }
1055
1056    #[test]
1057    fn test_read_string_hex_escape_invalid_codepoint() {
1058        // 0xD800 is a surrogate — invalid Unicode scalar
1059        assert!(read(r#""\xD800;""#).is_err());
1060    }
1061
1062    #[test]
1063    fn test_read_string_hex_escape_too_large() {
1064        // 0x110000 is above Unicode max
1065        assert!(read(r#""\x110000;""#).is_err());
1066    }
1067
1068    #[test]
1069    fn test_read_string_u_escape_basic() {
1070        // \u0041 is 'A'
1071        let result = read(r#""\u0041""#).unwrap();
1072        assert_eq!(result, Value::string("A"));
1073    }
1074
1075    #[test]
1076    fn test_read_string_u_escape_lambda() {
1077        let result = read(r#""\u03BB""#).unwrap();
1078        assert_eq!(result, Value::string("λ"));
1079    }
1080
1081    #[test]
1082    fn test_read_string_u_escape_esc() {
1083        let result = read(r#""\u001B""#).unwrap();
1084        assert_eq!(result, Value::string("\x1B"));
1085    }
1086
1087    #[test]
1088    fn test_read_string_u_escape_too_few_digits() {
1089        assert!(read(r#""\u041""#).is_err());
1090    }
1091
1092    #[test]
1093    fn test_read_string_u_escape_surrogate() {
1094        assert!(read(r#""\uD800""#).is_err());
1095    }
1096
1097    #[test]
1098    fn test_read_string_big_u_escape_basic() {
1099        let result = read(r#""\U00000041""#).unwrap();
1100        assert_eq!(result, Value::string("A"));
1101    }
1102
1103    #[test]
1104    fn test_read_string_big_u_escape_emoji() {
1105        let result = read(r#""\U0001F600""#).unwrap();
1106        assert_eq!(result, Value::string("😀"));
1107    }
1108
1109    #[test]
1110    fn test_read_string_big_u_escape_too_few_digits() {
1111        assert!(read(r#""\U0041""#).is_err());
1112    }
1113
1114    #[test]
1115    fn test_read_string_big_u_escape_invalid() {
1116        assert!(read(r#""\U00110000""#).is_err());
1117    }
1118
1119    #[test]
1120    fn test_read_string_null_escape() {
1121        let result = read(r#""\0""#).unwrap();
1122        assert_eq!(result, Value::string("\0"));
1123    }
1124
1125    #[test]
1126    fn test_read_string_mixed_escapes() {
1127        // Mix all escape types in one string
1128        let result = read(r#""\x48;\u0069\n\t""#).unwrap();
1129        assert_eq!(result, Value::string("Hi\n\t"));
1130    }
1131
1132    #[test]
1133    fn test_read_string_ansi_escape_sequence() {
1134        // Real-world: ANSI color code ESC[31m (red)
1135        let result = read(r#""\x1B;[31mRed\x1B;[0m""#).unwrap();
1136        assert_eq!(result, Value::string("\x1B[31mRed\x1B[0m"));
1137    }
1138}