Skip to main content

sema_reader/
reader.rs

1use std::collections::BTreeMap;
2use std::rc::Rc;
3
4use sema_core::{resolve, SemaError, Span, SpanMap, Value, ValueView};
5
6use crate::lexer::{tokenize, FStringPart, SpannedToken, Token};
7
8/// Maximum nesting depth for parsing. Untrusted input (files, the WASM
9/// playground, f-string interpolations) must not be able to overflow the thread
10/// stack via thousands of nested forms. 1024 is far beyond any real program.
11const MAX_PARSE_DEPTH: usize = 1024;
12
13struct Parser {
14    tokens: Vec<SpannedToken>,
15    pos: usize,
16    span_map: SpanMap,
17    symbol_spans: Vec<(String, Span)>,
18    depth: usize,
19}
20
21impl Parser {
22    fn new(tokens: Vec<SpannedToken>) -> Self {
23        Parser {
24            tokens,
25            pos: 0,
26            span_map: SpanMap::new(),
27            symbol_spans: Vec::new(),
28            depth: 0,
29        }
30    }
31
32    fn peek(&self) -> Option<&Token> {
33        let mut pos = self.pos;
34        while let Some(t) = self.tokens.get(pos) {
35            match &t.token {
36                Token::Comment(_) | Token::Newline => pos += 1,
37                _ => return Some(&t.token),
38            }
39        }
40        None
41    }
42
43    fn span(&self) -> Span {
44        let mut pos = self.pos;
45        while let Some(t) = self.tokens.get(pos) {
46            match &t.token {
47                Token::Comment(_) | Token::Newline => pos += 1,
48                _ => return t.span,
49            }
50        }
51        Span::point(0, 0)
52    }
53
54    fn skip_trivia(&mut self) {
55        while let Some(t) = self.tokens.get(self.pos) {
56            match &t.token {
57                Token::Comment(_) | Token::Newline => self.pos += 1,
58                _ => break,
59            }
60        }
61    }
62
63    fn advance(&mut self) -> Option<&SpannedToken> {
64        self.skip_trivia();
65        let tok = self.tokens.get(self.pos);
66        if tok.is_some() {
67            self.pos += 1;
68        }
69        tok
70    }
71
72    fn expect(&mut self, expected: &Token) -> Result<(), SemaError> {
73        let span = self.span();
74        match self.advance() {
75            Some(t) if &t.token == expected => Ok(()),
76            Some(t) => Err(SemaError::Reader {
77                message: format!(
78                    "expected `{}`, got `{}`",
79                    token_display(expected),
80                    token_display(&t.token)
81                ),
82                span,
83            }),
84            None => Err(SemaError::Reader {
85                message: format!("expected `{}`, got end of input", token_display(expected)),
86                span,
87            }),
88        }
89    }
90
91    fn parse_expr(&mut self) -> Result<Value, SemaError> {
92        // Bound recursion depth on the single common entry point: every nested
93        // form (list/vector/map/short-lambda elements) recurses through here.
94        self.depth += 1;
95        if self.depth > MAX_PARSE_DEPTH {
96            self.depth -= 1;
97            return Err(SemaError::Reader {
98                message: format!("input nested too deeply (limit {MAX_PARSE_DEPTH})"),
99                span: self.span(),
100            }
101            .with_hint("reduce nesting depth"));
102        }
103        let result = self.parse_expr_inner();
104        self.depth -= 1;
105        result
106    }
107
108    fn parse_expr_inner(&mut self) -> Result<Value, SemaError> {
109        let span = self.span();
110        match self.peek() {
111            None => Err(SemaError::Reader {
112                message: "unexpected end of input".to_string(),
113                span,
114            }),
115            Some(Token::LParen) => self.parse_list(),
116            Some(Token::LBracket) => self.parse_vector(),
117            Some(Token::LBrace) => self.parse_map(),
118            Some(Token::Quote) => {
119                self.advance();
120                let inner = self.parse_expr().map_err(|_| {
121                    SemaError::Reader {
122                        message: "quote (') requires an expression after it".to_string(),
123                        span,
124                    }
125                    .with_hint("e.g. '(1 2 3) or 'foo")
126                })?;
127                self.make_list_with_span(vec![Value::symbol("quote"), inner], span)
128            }
129            Some(Token::Quasiquote) => {
130                self.advance();
131                let inner = self.parse_expr().map_err(|_| {
132                    SemaError::Reader {
133                        message: "quasiquote (`) requires an expression after it".to_string(),
134                        span,
135                    }
136                    .with_hint("e.g. `(list ,x)")
137                })?;
138                self.make_list_with_span(vec![Value::symbol("quasiquote"), inner], span)
139            }
140            Some(Token::Unquote) => {
141                self.advance();
142                let inner = self.parse_expr().map_err(|_| {
143                    SemaError::Reader {
144                        message: "unquote (,) requires an expression after it".to_string(),
145                        span,
146                    }
147                    .with_hint("use inside quasiquote, e.g. `(list ,x)")
148                })?;
149                self.make_list_with_span(vec![Value::symbol("unquote"), inner], span)
150            }
151            Some(Token::UnquoteSplice) => {
152                self.advance();
153                let inner = self.parse_expr().map_err(|_| {
154                    SemaError::Reader {
155                        message: "unquote-splicing (,@) requires an expression after it"
156                            .to_string(),
157                        span,
158                    }
159                    .with_hint("use inside quasiquote, e.g. `(list ,@xs)")
160                })?;
161                self.make_list_with_span(vec![Value::symbol("unquote-splicing"), inner], span)
162            }
163            Some(Token::BytevectorStart) => self.parse_bytevector(),
164            Some(Token::ShortLambdaStart) => self.parse_short_lambda(),
165            Some(_) => {
166                let val = self.parse_atom()?;
167                if let Some(name) = val.as_symbol() {
168                    self.symbol_spans.push((name, span));
169                }
170                Ok(val)
171            }
172        }
173    }
174
175    fn make_list_with_span(&mut self, items: Vec<Value>, span: Span) -> Result<Value, SemaError> {
176        let rc = Rc::new(items);
177        let ptr = Rc::as_ptr(&rc) as usize;
178        self.span_map.insert(ptr, span);
179        Ok(Value::list_from_rc(rc))
180    }
181
182    /// Get the span of the previously consumed token (the one at pos-1).
183    fn prev_span(&self) -> Span {
184        if self.pos > 0 {
185            self.tokens[self.pos - 1].span
186        } else {
187            Span::point(0, 0)
188        }
189    }
190
191    fn parse_list(&mut self) -> Result<Value, SemaError> {
192        let open_span = self.span();
193        self.expect(&Token::LParen)?;
194        let mut items = Vec::new();
195        while self.peek() != Some(&Token::RParen) {
196            if self.peek().is_none() {
197                return Err(SemaError::Reader {
198                    message: "unterminated list".to_string(),
199                    span: open_span,
200                }
201                .with_hint("add a closing `)`"));
202            }
203            if self.peek() == Some(&Token::RBracket) {
204                return Err(SemaError::Reader {
205                    message: "mismatched bracket: expected `)` to close `(`, found `]`".to_string(),
206                    span: self.span(),
207                }
208                .with_hint("this list was opened with `(` — close it with `)`"));
209            }
210            if self.peek() == Some(&Token::RBrace) {
211                return Err(SemaError::Reader {
212                    message: "mismatched bracket: expected `)` to close `(`, found `}`".to_string(),
213                    span: self.span(),
214                }
215                .with_hint("this list was opened with `(` — close it with `)`"));
216            }
217            // Handle dotted pairs: (a . b)
218            if self.peek() == Some(&Token::Dot) {
219                self.advance(); // skip dot
220                let cdr = self.parse_expr()?;
221                self.expect(&Token::RParen)?;
222                let close = self.prev_span();
223                items.push(Value::symbol("."));
224                items.push(cdr);
225                return self.make_list_with_span(items, open_span.to(&close));
226            }
227            items.push(self.parse_expr()?);
228        }
229        self.expect(&Token::RParen)?;
230        let close = self.prev_span();
231        self.make_list_with_span(items, open_span.to(&close))
232    }
233
234    fn parse_vector(&mut self) -> Result<Value, SemaError> {
235        let open_span = self.span();
236        self.expect(&Token::LBracket)?;
237        let mut items = Vec::new();
238        while self.peek() != Some(&Token::RBracket) {
239            if self.peek().is_none() {
240                return Err(SemaError::Reader {
241                    message: "unterminated vector".to_string(),
242                    span: open_span,
243                }
244                .with_hint("add a closing `]`"));
245            }
246            if self.peek() == Some(&Token::RParen) {
247                return Err(SemaError::Reader {
248                    message: "mismatched bracket: expected `]` to close `[`, found `)`".to_string(),
249                    span: self.span(),
250                }
251                .with_hint("this vector was opened with `[` — close it with `]`"));
252            }
253            if self.peek() == Some(&Token::RBrace) {
254                return Err(SemaError::Reader {
255                    message: "mismatched bracket: expected `]` to close `[`, found `}`".to_string(),
256                    span: self.span(),
257                }
258                .with_hint("this vector was opened with `[` — close it with `]`"));
259            }
260            items.push(self.parse_expr()?);
261        }
262        self.expect(&Token::RBracket)?;
263        let close = self.prev_span();
264        let rc = Rc::new(items);
265        let ptr = Rc::as_ptr(&rc) as usize;
266        self.span_map.insert(ptr, open_span.to(&close));
267        Ok(Value::vector_from_rc(rc))
268    }
269
270    fn parse_map(&mut self) -> Result<Value, SemaError> {
271        let open_span = self.span();
272        self.expect(&Token::LBrace)?;
273        let mut map = BTreeMap::new();
274        while self.peek() != Some(&Token::RBrace) {
275            if self.peek().is_none() {
276                return Err(SemaError::Reader {
277                    message: "unterminated map".to_string(),
278                    span: open_span,
279                }
280                .with_hint("add a closing `}`"));
281            }
282            if self.peek() == Some(&Token::RParen) {
283                return Err(SemaError::Reader {
284                    message: "mismatched bracket: expected `}` to close `{`, found `)`".to_string(),
285                    span: self.span(),
286                }
287                .with_hint("this map was opened with `{` — close it with `}`"));
288            }
289            if self.peek() == Some(&Token::RBracket) {
290                return Err(SemaError::Reader {
291                    message: "mismatched bracket: expected `}` to close `{`, found `]`".to_string(),
292                    span: self.span(),
293                }
294                .with_hint("this map was opened with `{` — close it with `}`"));
295            }
296            let key = self.parse_expr()?;
297            if self.peek() == Some(&Token::RBrace) || self.peek().is_none() {
298                return Err(SemaError::Reader {
299                    message: "map literal must have even number of forms".to_string(),
300                    span: self.span(),
301                });
302            }
303            let val = self.parse_expr()?;
304            map.insert(key, val);
305        }
306        self.expect(&Token::RBrace)?;
307        Ok(Value::map(map))
308    }
309
310    fn parse_bytevector(&mut self) -> Result<Value, SemaError> {
311        let open_span = self.span();
312        self.advance(); // consume BytevectorStart token
313        let mut bytes = Vec::new();
314        while self.peek() != Some(&Token::RParen) {
315            if self.peek().is_none() {
316                return Err(SemaError::Reader {
317                    message: "unterminated bytevector".to_string(),
318                    span: open_span,
319                }
320                .with_hint("add a closing `)`"));
321            }
322            let span = self.span();
323            match self.peek() {
324                Some(Token::Int(n)) => {
325                    let n = *n;
326                    self.advance();
327                    if !(0..=255).contains(&n) {
328                        return Err(SemaError::Reader {
329                            message: format!("#u8(...): byte value {n} out of range 0..255"),
330                            span,
331                        });
332                    }
333                    bytes.push(n as u8);
334                }
335                _ => {
336                    return Err(SemaError::Reader {
337                        message: "#u8(...): expected integer byte value".to_string(),
338                        span,
339                    });
340                }
341            }
342        }
343        self.expect(&Token::RParen)?;
344        Ok(Value::bytevector(bytes))
345    }
346
347    fn parse_short_lambda(&mut self) -> Result<Value, SemaError> {
348        let open_span = self.span();
349        self.advance(); // consume ShortLambdaStart
350        let mut body_items = Vec::new();
351        while self.peek() != Some(&Token::RParen) {
352            if self.peek().is_none() {
353                return Err(SemaError::Reader {
354                    message: "unterminated short lambda #(...)".to_string(),
355                    span: open_span,
356                }
357                .with_hint("add a closing `)`"));
358            }
359            body_items.push(self.parse_expr()?);
360        }
361        self.expect(&Token::RParen)?;
362
363        // Build the body as a single list form: (fn-name arg1 arg2 ...)
364        let body = Value::list(body_items);
365
366        // Scan body for % / %1 / %2 etc., rewrite % → %1
367        let mut max_arg: usize = 0;
368        let body = rewrite_percent_args(&body, &mut max_arg);
369
370        // Build parameter list
371        let params: Vec<Value> = if max_arg == 0 {
372            vec![]
373        } else {
374            (1..=max_arg)
375                .map(|n| Value::symbol(&format!("%{}", n)))
376                .collect()
377        };
378
379        Ok(Value::list(vec![
380            Value::symbol("lambda"),
381            Value::list(params),
382            body,
383        ]))
384    }
385
386    /// After a parse error, skip tokens until we reach a position that
387    /// could plausibly start a new top-level expression (depth-0 open bracket,
388    /// quote, or atom). This enables error recovery in `read_many_recover`.
389    fn recover_to_next_expr(&mut self) {
390        let mut depth: usize = 0;
391        while let Some(tok) = self.peek() {
392            match tok {
393                // Opening brackets increase depth
394                Token::LParen
395                | Token::LBracket
396                | Token::LBrace
397                | Token::ShortLambdaStart
398                | Token::BytevectorStart => {
399                    if depth == 0 {
400                        // This could start a new top-level form — stop here
401                        return;
402                    }
403                    self.advance();
404                    depth += 1;
405                }
406                // Closing brackets decrease depth
407                Token::RParen | Token::RBracket | Token::RBrace => {
408                    if depth == 0 {
409                        // Stray closer at top level — stop and let parse_expr report it
410                        return;
411                    }
412                    self.advance();
413                    depth -= 1;
414                }
415                // Quote-like prefixes at depth 0 could start a new form
416                Token::Quote | Token::Quasiquote | Token::Unquote | Token::UnquoteSplice => {
417                    if depth == 0 {
418                        return;
419                    }
420                    self.advance();
421                }
422                // Atoms at depth 0 could be a top-level expression
423                _ => {
424                    if depth == 0 {
425                        return;
426                    }
427                    self.advance();
428                }
429            }
430        }
431    }
432
433    fn parse_atom(&mut self) -> Result<Value, SemaError> {
434        let span = self.span();
435        match self.advance() {
436            Some(SpannedToken {
437                token: Token::Int(n),
438                ..
439            }) => Ok(Value::int(*n)),
440            Some(SpannedToken {
441                token: Token::Float(f),
442                ..
443            }) => Ok(Value::float(*f)),
444            Some(SpannedToken {
445                token: Token::String(s),
446                ..
447            }) => Ok(Value::string(s)),
448            Some(SpannedToken {
449                token: Token::Regex(s),
450                ..
451            }) => Ok(Value::string(s)),
452            Some(SpannedToken {
453                token: Token::Symbol(s),
454                ..
455            }) => {
456                if s == "nil" {
457                    Ok(Value::nil())
458                } else {
459                    Ok(Value::symbol(s))
460                }
461            }
462            Some(SpannedToken {
463                token: Token::Keyword(s),
464                ..
465            }) => Ok(Value::keyword(s)),
466            Some(SpannedToken {
467                token: Token::Bool(b),
468                ..
469            }) => Ok(Value::bool(*b)),
470            Some(SpannedToken {
471                token: Token::Char(c),
472                ..
473            }) => Ok(Value::char(*c)),
474            Some(SpannedToken {
475                token: Token::FString(parts),
476                ..
477            }) => {
478                let parts = parts.clone();
479                let mut items = vec![Value::symbol("str")];
480                for part in &parts {
481                    match part {
482                        FStringPart::Literal(s) => {
483                            if !s.is_empty() {
484                                items.push(Value::string(s));
485                            }
486                        }
487                        FStringPart::Expr(src) => {
488                            // Parse the interpolation as a sub-expression. Thread
489                            // the current depth through so nested f-strings can't
490                            // bypass MAX_PARSE_DEPTH by starting a fresh parser at
491                            // depth 0 (READ-1).
492                            let sub_tokens = tokenize(src)?;
493                            let mut sub = Parser::new(sub_tokens);
494                            sub.depth = self.depth;
495                            if sub.peek().is_none() {
496                                return Err(SemaError::Reader {
497                                    message: "f-string interpolation is empty".to_string(),
498                                    span,
499                                }
500                                .with_hint("put an expression inside ${...}"));
501                            }
502                            let val = sub.parse_expr()?;
503                            // An interpolation must hold exactly one expression;
504                            // silently dropping extra forms hides bugs (READ-2).
505                            if sub.peek().is_some() {
506                                return Err(SemaError::Reader {
507                                    message:
508                                        "f-string interpolation must contain exactly one expression"
509                                            .to_string(),
510                                    span,
511                                }
512                                .with_hint("wrap multiple forms, e.g. ${(do a b)}"));
513                            }
514                            items.push(val);
515                        }
516                    }
517                }
518                Ok(Value::list(items))
519            }
520            Some(t) => {
521                let (name, hint) = match &t.token {
522                    Token::RParen => (
523                        "unexpected closing `)`",
524                        Some("no matching opening parenthesis"),
525                    ),
526                    Token::RBracket => (
527                        "unexpected closing `]`",
528                        Some("no matching opening bracket"),
529                    ),
530                    Token::RBrace => ("unexpected closing `}`", Some("no matching opening brace")),
531                    Token::Dot => (
532                        "unexpected `.`",
533                        Some("dots are used in pair notation, e.g. (a . b)"),
534                    ),
535                    _ => ("unexpected token", None),
536                };
537                let err = SemaError::Reader {
538                    message: name.to_string(),
539                    span,
540                };
541                Err(if let Some(h) = hint {
542                    err.with_hint(h)
543                } else {
544                    err
545                })
546            }
547            None => Err(SemaError::Reader {
548                message: "unexpected end of input".to_string(),
549                span,
550            }),
551        }
552    }
553}
554
555fn token_display(tok: &Token) -> &'static str {
556    match tok {
557        Token::LParen => "(",
558        Token::RParen => ")",
559        Token::LBracket => "[",
560        Token::RBracket => "]",
561        Token::LBrace => "{",
562        Token::RBrace => "}",
563        Token::Quote => "'",
564        Token::Quasiquote => "`",
565        Token::Unquote => ",",
566        Token::UnquoteSplice => ",@",
567        Token::Dot => ".",
568        Token::BytevectorStart => "#u8(",
569        Token::Int(_) => "integer",
570        Token::Float(_) => "float",
571        Token::String(_) => "string",
572        Token::Symbol(_) => "symbol",
573        Token::Keyword(_) => "keyword",
574        Token::Bool(_) => "boolean",
575        Token::Char(_) => "character",
576        Token::FString(_) => "f-string",
577        Token::ShortLambdaStart => "#(",
578        Token::Comment(_) => "comment",
579        Token::Newline => "newline",
580        Token::Regex(_) => "regex",
581    }
582}
583
584/// Recursively scan a Value AST for `%`, `%1`, `%2`, etc. symbols.
585/// Rewrites bare `%` to `%1`. Tracks the highest numbered arg in `max_arg`.
586/// Skips recursion into nested `(lambda ...)` / `(fn ...)` forms.
587fn rewrite_percent_args(expr: &Value, max_arg: &mut usize) -> Value {
588    match expr.view() {
589        ValueView::Symbol(spur) => {
590            let name = resolve(spur);
591            if name == "%" {
592                *max_arg = (*max_arg).max(1);
593                Value::symbol("%1")
594            } else if let Some(rest) = name.strip_prefix('%') {
595                if let Ok(n) = rest.parse::<usize>() {
596                    if n > 0 {
597                        *max_arg = (*max_arg).max(n);
598                    }
599                }
600                expr.clone()
601            } else {
602                expr.clone()
603            }
604        }
605        ValueView::List(items) => {
606            // Skip nested (lambda ...) / (fn ...) forms — their % args are their own
607            if let Some(first) = items.first() {
608                if let ValueView::Symbol(s) = first.view() {
609                    let name = resolve(s);
610                    if name == "lambda" || name == "fn" {
611                        return expr.clone();
612                    }
613                }
614            }
615            let new_items: Vec<Value> = items
616                .iter()
617                .map(|item| rewrite_percent_args(item, max_arg))
618                .collect();
619            Value::list(new_items)
620        }
621        ValueView::Vector(items) => {
622            let new_items: Vec<Value> = items
623                .iter()
624                .map(|item| rewrite_percent_args(item, max_arg))
625                .collect();
626            Value::vector(new_items)
627        }
628        _ => expr.clone(),
629    }
630}
631
632/// Read a single s-expression from a string.
633pub fn read(input: &str) -> Result<Value, SemaError> {
634    let tokens = tokenize(input)?;
635    let mut parser = Parser::new(tokens);
636    if parser.peek().is_none() {
637        return Ok(Value::nil());
638    }
639    parser.parse_expr()
640}
641
642/// Read all s-expressions from a string.
643pub fn read_many(input: &str) -> Result<Vec<Value>, SemaError> {
644    let tokens = tokenize(input)?;
645    let mut parser = Parser::new(tokens);
646    let mut exprs = Vec::new();
647    while parser.peek().is_some() {
648        exprs.push(parser.parse_expr()?);
649    }
650    Ok(exprs)
651}
652
653/// Read all s-expressions and return the accumulated span map.
654pub fn read_many_with_spans(input: &str) -> Result<(Vec<Value>, SpanMap), SemaError> {
655    let tokens = tokenize(input)?;
656    let mut parser = Parser::new(tokens);
657    let mut exprs = Vec::new();
658    while parser.peek().is_some() {
659        exprs.push(parser.parse_expr()?);
660    }
661    Ok((exprs, parser.span_map))
662}
663
664/// Read all s-expressions and return spans for both compound expressions and individual symbols.
665/// Symbol spans enable precise go-to-definition (jumping to the name, not the whole form).
666#[allow(clippy::type_complexity)]
667pub fn read_many_with_symbol_spans(
668    input: &str,
669) -> Result<(Vec<Value>, SpanMap, Vec<(String, Span)>), SemaError> {
670    let tokens = tokenize(input)?;
671    let mut parser = Parser::new(tokens);
672    let mut exprs = Vec::new();
673    while parser.peek().is_some() {
674        exprs.push(parser.parse_expr()?);
675    }
676    Ok((exprs, parser.span_map, parser.symbol_spans))
677}
678
679/// Read all s-expressions with error recovery.
680/// On parse errors, skips to the next top-level form and continues.
681/// Returns (successfully parsed forms, span map, collected errors).
682/// Tokenizer errors are returned as a single error with no parsed forms.
683#[allow(clippy::type_complexity)]
684pub fn read_many_with_spans_recover(
685    input: &str,
686) -> (Vec<Value>, SpanMap, Vec<(String, Span)>, Vec<SemaError>) {
687    let tokens = match tokenize(input) {
688        Ok(t) => t,
689        Err(e) => return (vec![], SpanMap::new(), vec![], vec![e]),
690    };
691    let mut parser = Parser::new(tokens);
692    let mut exprs = Vec::new();
693    let mut errors = Vec::new();
694    while parser.peek().is_some() {
695        match parser.parse_expr() {
696            Ok(expr) => exprs.push(expr),
697            Err(err) => {
698                errors.push(err);
699                parser.recover_to_next_expr();
700            }
701        }
702    }
703    (exprs, parser.span_map, parser.symbol_spans, errors)
704}
705
706#[cfg(test)]
707#[allow(clippy::approx_constant)]
708mod tests {
709    use super::*;
710
711    #[test]
712    fn test_read_int() {
713        assert_eq!(read("42").unwrap(), Value::int(42));
714    }
715
716    #[test]
717    fn deeply_nested_input_errors_instead_of_overflowing() {
718        // Untrusted input with thousands of levels of nesting must return a
719        // reader error rather than recurse to a stack overflow. Run on a large
720        // stack so the result reflects the depth-limit check, not the small
721        // default test-thread stack (which would SIGSEGV either way).
722        let result = std::thread::Builder::new()
723            .stack_size(16 * 1024 * 1024)
724            .spawn(|| {
725                let depth = 3000;
726                let src = format!("{}{}", "[".repeat(depth), "]".repeat(depth));
727                read(&src).is_err()
728            })
729            .unwrap()
730            .join()
731            .expect("parser must not overflow the stack on deeply nested input");
732        assert!(
733            result,
734            "expected a depth-limit error for deeply nested input"
735        );
736    }
737
738    #[test]
739    fn test_read_negative_int() {
740        assert_eq!(read("-7").unwrap(), Value::int(-7));
741    }
742
743    #[test]
744    fn test_read_float() {
745        assert_eq!(read("3.14").unwrap(), Value::float(3.14));
746    }
747
748    #[test]
749    fn test_read_string() {
750        assert_eq!(read("\"hello\"").unwrap(), Value::string("hello"));
751    }
752
753    #[test]
754    fn test_read_symbol() {
755        assert_eq!(read("foo").unwrap(), Value::symbol("foo"));
756    }
757
758    #[test]
759    fn test_read_keyword() {
760        assert_eq!(read(":bar").unwrap(), Value::keyword("bar"));
761    }
762
763    #[test]
764    fn test_read_bool() {
765        assert_eq!(read("#t").unwrap(), Value::bool(true));
766        assert_eq!(read("#f").unwrap(), Value::bool(false));
767    }
768
769    #[test]
770    fn test_read_list() {
771        let result = read("(+ 1 2)").unwrap();
772        assert_eq!(
773            result,
774            Value::list(vec![Value::symbol("+"), Value::int(1), Value::int(2)])
775        );
776    }
777
778    #[test]
779    fn test_read_nested_list() {
780        let result = read("(* (+ 1 2) 3)").unwrap();
781        assert_eq!(
782            result,
783            Value::list(vec![
784                Value::symbol("*"),
785                Value::list(vec![Value::symbol("+"), Value::int(1), Value::int(2)]),
786                Value::int(3)
787            ])
788        );
789    }
790
791    #[test]
792    fn test_read_vector() {
793        let result = read("[1 2 3]").unwrap();
794        assert_eq!(
795            result,
796            Value::vector(vec![Value::int(1), Value::int(2), Value::int(3)])
797        );
798    }
799
800    #[test]
801    fn test_read_map() {
802        let result = read("{:a 1 :b 2}").unwrap();
803        let mut expected = BTreeMap::new();
804        expected.insert(Value::keyword("a"), Value::int(1));
805        expected.insert(Value::keyword("b"), Value::int(2));
806        assert_eq!(result, Value::map(expected));
807    }
808
809    #[test]
810    fn test_read_quote() {
811        let result = read("'foo").unwrap();
812        assert_eq!(
813            result,
814            Value::list(vec![Value::symbol("quote"), Value::symbol("foo")])
815        );
816    }
817
818    #[test]
819    fn test_read_quasiquote() {
820        let result = read("`(a ,b ,@c)").unwrap();
821        assert_eq!(
822            result,
823            Value::list(vec![
824                Value::symbol("quasiquote"),
825                Value::list(vec![
826                    Value::symbol("a"),
827                    Value::list(vec![Value::symbol("unquote"), Value::symbol("b")]),
828                    Value::list(vec![Value::symbol("unquote-splicing"), Value::symbol("c")]),
829                ])
830            ])
831        );
832    }
833
834    #[test]
835    fn test_read_nil() {
836        assert_eq!(read("nil").unwrap(), Value::nil());
837    }
838
839    #[test]
840    fn test_read_many_exprs() {
841        let results = read_many("1 2 3").unwrap();
842        assert_eq!(results, vec![Value::int(1), Value::int(2), Value::int(3)]);
843    }
844
845    #[test]
846    fn test_comments() {
847        let result = read_many("; comment\n(+ 1 2)").unwrap();
848        assert_eq!(result.len(), 1);
849    }
850
851    #[test]
852    fn test_read_zero() {
853        assert_eq!(read("0").unwrap(), Value::int(0));
854    }
855
856    #[test]
857    fn test_read_negative_zero() {
858        assert_eq!(read("-0").unwrap(), Value::int(0));
859    }
860
861    #[test]
862    fn test_read_leading_zeros() {
863        assert_eq!(read("007").unwrap(), Value::int(7));
864    }
865
866    #[test]
867    fn test_read_large_int() {
868        assert_eq!(read("9999999999999").unwrap(), Value::int(9999999999999));
869    }
870
871    #[test]
872    fn test_read_int_overflow() {
873        // i64::MAX + 1 should error, not silently wrap
874        assert!(read("9999999999999999999999").is_err());
875    }
876
877    #[test]
878    fn test_read_negative_float() {
879        assert_eq!(read("-2.5").unwrap(), Value::float(-2.5));
880    }
881
882    #[test]
883    fn test_read_float_leading_zero() {
884        assert_eq!(read("0.5").unwrap(), Value::float(0.5));
885    }
886
887    #[test]
888    fn test_read_minus_is_symbol() {
889        // Bare `-` should be a symbol (subtraction operator), not a number
890        assert_eq!(read("-").unwrap(), Value::symbol("-"));
891    }
892
893    #[test]
894    fn test_read_minus_in_list() {
895        // `(- 3)` should parse as call to `-` with arg 3
896        let result = read("(- 3)").unwrap();
897        assert_eq!(result, Value::list(vec![Value::symbol("-"), Value::int(3)]));
898    }
899
900    #[test]
901    fn test_read_negative_in_list() {
902        // `(-3)` should parse as list containing -3
903        let result = read("(-3)").unwrap();
904        assert_eq!(result, Value::list(vec![Value::int(-3)]));
905    }
906
907    #[test]
908    fn test_read_empty_string() {
909        assert_eq!(read(r#""""#).unwrap(), Value::string(""));
910    }
911
912    #[test]
913    fn test_read_string_with_escapes() {
914        assert_eq!(
915            read(r#""\n\t\r\\\"" "#).unwrap(),
916            Value::string("\n\t\r\\\"")
917        );
918    }
919
920    #[test]
921    fn test_read_string_unknown_escape() {
922        // Unknown escape sequences are preserved literally
923        assert_eq!(read(r#""\z""#).unwrap(), Value::string("\\z"));
924    }
925
926    #[test]
927    fn test_read_string_with_newline() {
928        assert_eq!(
929            read("\"line1\nline2\"").unwrap(),
930            Value::string("line1\nline2")
931        );
932    }
933
934    #[test]
935    fn test_read_unterminated_string() {
936        assert!(read("\"hello").is_err());
937    }
938
939    #[test]
940    fn test_read_string_escaped_quote_at_end() {
941        // `"test\"` — the backslash escapes the quote, string is unterminated
942        assert!(read(r#""test\""#).is_err());
943    }
944
945    #[test]
946    fn test_read_string_with_unicode() {
947        assert_eq!(read("\"héllo\"").unwrap(), Value::string("héllo"));
948        assert_eq!(read("\"日本語\"").unwrap(), Value::string("日本語"));
949        assert_eq!(read("\"🎉\"").unwrap(), Value::string("🎉"));
950    }
951
952    #[test]
953    fn test_read_string_with_parens() {
954        assert_eq!(read("\"(+ 1 2)\"").unwrap(), Value::string("(+ 1 2)"));
955    }
956
957    #[test]
958    fn test_read_operator_symbols() {
959        assert_eq!(read("+").unwrap(), Value::symbol("+"));
960        assert_eq!(read("*").unwrap(), Value::symbol("*"));
961        assert_eq!(read("/").unwrap(), Value::symbol("/"));
962        assert_eq!(read("<=").unwrap(), Value::symbol("<="));
963        assert_eq!(read(">=").unwrap(), Value::symbol(">="));
964    }
965
966    #[test]
967    fn test_read_predicate_symbols() {
968        assert_eq!(read("null?").unwrap(), Value::symbol("null?"));
969        assert_eq!(read("list?").unwrap(), Value::symbol("list?"));
970    }
971
972    #[test]
973    fn test_read_arrow_symbols() {
974        assert_eq!(
975            read("string->symbol").unwrap(),
976            Value::symbol("string->symbol")
977        );
978    }
979
980    #[test]
981    fn test_read_namespaced_symbols() {
982        assert_eq!(read("file/read").unwrap(), Value::symbol("file/read"));
983        assert_eq!(read("http/get").unwrap(), Value::symbol("http/get"));
984    }
985
986    #[test]
987    fn test_read_true_false_as_bool() {
988        assert_eq!(read("true").unwrap(), Value::bool(true));
989        assert_eq!(read("false").unwrap(), Value::bool(false));
990    }
991
992    #[test]
993    fn test_read_bare_colon_error() {
994        // `:` alone without a name should error
995        assert!(read(":").is_err());
996    }
997
998    #[test]
999    fn test_read_keyword_with_numbers() {
1000        assert_eq!(read(":foo123").unwrap(), Value::keyword("foo123"));
1001    }
1002
1003    #[test]
1004    fn test_read_keyword_with_hyphens() {
1005        assert_eq!(read(":max-turns").unwrap(), Value::keyword("max-turns"));
1006    }
1007
1008    #[test]
1009    fn test_read_hash_invalid() {
1010        assert!(read("#x").is_err());
1011        assert!(read("#").is_err());
1012    }
1013
1014    #[test]
1015    fn test_read_empty() {
1016        assert_eq!(read("").unwrap(), Value::nil());
1017    }
1018
1019    #[test]
1020    fn test_read_whitespace_only() {
1021        assert_eq!(read("   \n\t  ").unwrap(), Value::nil());
1022    }
1023
1024    #[test]
1025    fn test_read_many_empty() {
1026        assert_eq!(read_many("").unwrap(), vec![]);
1027    }
1028
1029    #[test]
1030    fn test_read_many_whitespace_only() {
1031        assert_eq!(read_many("  \n  ").unwrap(), vec![]);
1032    }
1033
1034    #[test]
1035    fn test_read_comment_only() {
1036        assert_eq!(read_many("; just a comment").unwrap(), vec![]);
1037    }
1038
1039    #[test]
1040    fn test_read_empty_list() {
1041        assert_eq!(read("()").unwrap(), Value::list(vec![]));
1042    }
1043
1044    #[test]
1045    fn test_read_deeply_nested() {
1046        let result = read("((((42))))").unwrap();
1047        assert_eq!(
1048            result,
1049            Value::list(vec![Value::list(vec![Value::list(vec![Value::list(
1050                vec![Value::int(42)]
1051            )])])])
1052        );
1053    }
1054
1055    #[test]
1056    fn test_read_unterminated_list() {
1057        assert!(read("(1 2").is_err());
1058    }
1059
1060    #[test]
1061    fn test_read_extra_rparen() {
1062        // `read` only reads one expr, so extra `)` is just ignored (not consumed)
1063        // But `read_many` should fail since `)` is not a valid expr start
1064        let result = read("42").unwrap();
1065        assert_eq!(result, Value::int(42));
1066    }
1067
1068    #[test]
1069    fn test_read_dotted_pair() {
1070        let result = read("(a . b)").unwrap();
1071        assert_eq!(
1072            result,
1073            Value::list(vec![
1074                Value::symbol("a"),
1075                Value::symbol("."),
1076                Value::symbol("b")
1077            ])
1078        );
1079    }
1080
1081    #[test]
1082    fn test_read_empty_vector() {
1083        assert_eq!(read("[]").unwrap(), Value::vector(vec![]));
1084    }
1085
1086    #[test]
1087    fn test_read_unterminated_vector() {
1088        assert!(read("[1 2").is_err());
1089    }
1090
1091    #[test]
1092    fn test_read_empty_map() {
1093        assert_eq!(read("{}").unwrap(), Value::map(BTreeMap::new()));
1094    }
1095
1096    #[test]
1097    fn test_read_unterminated_map() {
1098        assert!(read("{:a 1").is_err());
1099    }
1100
1101    #[test]
1102    fn test_read_map_odd_elements() {
1103        assert!(read("{:a 1 :b}").is_err());
1104    }
1105
1106    #[test]
1107    fn test_read_map_duplicate_keys() {
1108        // Later key wins (BTreeMap insert replaces)
1109        let result = read("{:a 1 :a 2}").unwrap();
1110        let mut expected = BTreeMap::new();
1111        expected.insert(Value::keyword("a"), Value::int(2));
1112        assert_eq!(result, Value::map(expected));
1113    }
1114
1115    #[test]
1116    fn test_read_nested_quote() {
1117        let result = read("''foo").unwrap();
1118        assert_eq!(
1119            result,
1120            Value::list(vec![
1121                Value::symbol("quote"),
1122                Value::list(vec![Value::symbol("quote"), Value::symbol("foo")])
1123            ])
1124        );
1125    }
1126
1127    #[test]
1128    fn test_read_quote_list() {
1129        let result = read("'(1 2 3)").unwrap();
1130        assert_eq!(
1131            result,
1132            Value::list(vec![
1133                Value::symbol("quote"),
1134                Value::list(vec![Value::int(1), Value::int(2), Value::int(3)])
1135            ])
1136        );
1137    }
1138
1139    #[test]
1140    fn test_read_quote_at_eof() {
1141        assert!(read("'").is_err());
1142    }
1143
1144    #[test]
1145    fn test_read_unquote_at_eof() {
1146        assert!(read(",").is_err());
1147    }
1148
1149    #[test]
1150    fn test_read_unquote_splice_at_eof() {
1151        assert!(read(",@").is_err());
1152    }
1153
1154    #[test]
1155    fn test_read_quasiquote_at_eof() {
1156        assert!(read("`").is_err());
1157    }
1158
1159    #[test]
1160    fn test_read_comment_after_expr() {
1161        assert_eq!(read_many("42 ; comment").unwrap(), vec![Value::int(42)]);
1162    }
1163
1164    #[test]
1165    fn test_read_multiple_comments() {
1166        let result = read_many("; first\n; second\n42").unwrap();
1167        assert_eq!(result, vec![Value::int(42)]);
1168    }
1169
1170    #[test]
1171    fn test_read_comment_no_newline() {
1172        // Comment at end of input without trailing newline
1173        assert_eq!(read_many("; comment").unwrap(), vec![]);
1174    }
1175
1176    #[test]
1177    fn test_read_crlf_line_endings() {
1178        let result = read_many("1\r\n2\r\n3").unwrap();
1179        assert_eq!(result, vec![Value::int(1), Value::int(2), Value::int(3)]);
1180    }
1181
1182    #[test]
1183    fn test_read_tabs_as_whitespace() {
1184        assert_eq!(
1185            read("(\t+\t1\t2\t)").unwrap(),
1186            Value::list(vec![Value::symbol("+"), Value::int(1), Value::int(2)])
1187        );
1188    }
1189
1190    #[test]
1191    fn test_read_mixed_collections() {
1192        // List containing vector and map
1193        let result = read("([1 2] {:a 3})").unwrap();
1194        let mut map = BTreeMap::new();
1195        map.insert(Value::keyword("a"), Value::int(3));
1196        assert_eq!(
1197            result,
1198            Value::list(vec![
1199                Value::vector(vec![Value::int(1), Value::int(2)]),
1200                Value::map(map)
1201            ])
1202        );
1203    }
1204
1205    #[test]
1206    fn test_read_many_mixed_types() {
1207        let result = read_many(r#"42 3.14 "hello" foo :bar #t nil"#).unwrap();
1208        assert_eq!(result.len(), 7);
1209        assert_eq!(result[0], Value::int(42));
1210        assert_eq!(result[1], Value::float(3.14));
1211        assert_eq!(result[2], Value::string("hello"));
1212        assert_eq!(result[3], Value::symbol("foo"));
1213        assert_eq!(result[4], Value::keyword("bar"));
1214        assert_eq!(result[5], Value::bool(true));
1215        assert_eq!(result[6], Value::nil());
1216    }
1217
1218    #[test]
1219    fn test_span_map_tracks_lists() {
1220        let (exprs, spans) = read_many_with_spans("(+ 1 2)").unwrap();
1221        assert_eq!(exprs.len(), 1);
1222        // The list should have a span entry
1223        let rc = exprs[0].as_list_rc().expect("expected list");
1224        let ptr = Rc::as_ptr(&rc) as usize;
1225        let span = spans.get(&ptr).expect("list should have span");
1226        assert_eq!(span.line, 1);
1227        assert_eq!(span.col, 1);
1228    }
1229
1230    #[test]
1231    fn test_span_map_multiline() {
1232        let (exprs, spans) = read_many_with_spans("(foo)\n(bar)").unwrap();
1233        assert_eq!(exprs.len(), 2);
1234        let rc = exprs[1].as_list_rc().expect("expected list");
1235        let ptr = Rc::as_ptr(&rc) as usize;
1236        let span = spans.get(&ptr).expect("second list should have span");
1237        assert_eq!(span.line, 2);
1238        assert_eq!(span.col, 1);
1239    }
1240
1241    #[test]
1242    fn test_read_unexpected_char() {
1243        assert!(read("@").is_err());
1244        assert!(read("$").is_err());
1245    }
1246
1247    #[test]
1248    fn test_read_char_literal() {
1249        assert_eq!(read("#\\a").unwrap(), Value::char('a'));
1250        assert_eq!(read("#\\Z").unwrap(), Value::char('Z'));
1251        assert_eq!(read("#\\0").unwrap(), Value::char('0'));
1252    }
1253
1254    #[test]
1255    fn test_read_char_named() {
1256        assert_eq!(read("#\\space").unwrap(), Value::char(' '));
1257        assert_eq!(read("#\\newline").unwrap(), Value::char('\n'));
1258        assert_eq!(read("#\\tab").unwrap(), Value::char('\t'));
1259        assert_eq!(read("#\\return").unwrap(), Value::char('\r'));
1260        assert_eq!(read("#\\nul").unwrap(), Value::char('\0'));
1261    }
1262
1263    #[test]
1264    fn test_read_char_special() {
1265        assert_eq!(read("#\\(").unwrap(), Value::char('('));
1266        assert_eq!(read("#\\)").unwrap(), Value::char(')'));
1267    }
1268
1269    #[test]
1270    fn test_read_char_in_list() {
1271        let result = read("(#\\a #\\b)").unwrap();
1272        assert_eq!(
1273            result,
1274            Value::list(vec![Value::char('a'), Value::char('b')])
1275        );
1276    }
1277
1278    #[test]
1279    fn test_read_char_unknown_name() {
1280        assert!(read("#\\foobar").is_err());
1281    }
1282
1283    #[test]
1284    fn test_read_char_eof() {
1285        assert!(read("#\\").is_err());
1286    }
1287
1288    #[test]
1289    fn test_read_bytevector_literal() {
1290        assert_eq!(
1291            read("#u8(1 2 3)").unwrap(),
1292            Value::bytevector(vec![1, 2, 3])
1293        );
1294    }
1295
1296    #[test]
1297    fn test_read_bytevector_empty() {
1298        assert_eq!(read("#u8()").unwrap(), Value::bytevector(vec![]));
1299    }
1300
1301    #[test]
1302    fn test_read_bytevector_single() {
1303        assert_eq!(read("#u8(255)").unwrap(), Value::bytevector(vec![255]));
1304    }
1305
1306    #[test]
1307    fn test_read_bytevector_out_of_range() {
1308        assert!(read("#u8(256)").is_err());
1309    }
1310
1311    #[test]
1312    fn test_read_bytevector_negative() {
1313        assert!(read("#u8(-1)").is_err());
1314    }
1315
1316    #[test]
1317    fn test_read_bytevector_non_integer() {
1318        assert!(read("#u8(1.5)").is_err());
1319    }
1320
1321    #[test]
1322    fn test_read_bytevector_unterminated() {
1323        assert!(read("#u8(1 2").is_err());
1324    }
1325
1326    #[test]
1327    fn test_read_bytevector_in_list() {
1328        let result = read("(#u8(1 2) #u8(3))").unwrap();
1329        assert_eq!(
1330            result,
1331            Value::list(vec![
1332                Value::bytevector(vec![1, 2]),
1333                Value::bytevector(vec![3]),
1334            ])
1335        );
1336    }
1337
1338    #[test]
1339    fn test_read_string_hex_escape_basic() {
1340        // \x41; is 'A'
1341        let result = read(r#""\x41;""#).unwrap();
1342        assert_eq!(result, Value::string("A"));
1343    }
1344
1345    #[test]
1346    fn test_read_string_hex_escape_lowercase() {
1347        let result = read(r#""\x6c;""#).unwrap();
1348        assert_eq!(result, Value::string("l"));
1349    }
1350
1351    #[test]
1352    fn test_read_string_hex_escape_mixed_case() {
1353        let result = read(r#""\x4F;""#).unwrap();
1354        assert_eq!(result, Value::string("O"));
1355    }
1356
1357    #[test]
1358    fn test_read_string_hex_escape_esc_char() {
1359        // \x1B; is ESC (0x1b) — the main motivating use case
1360        let result = read(r#""\x1B;""#).unwrap();
1361        assert_eq!(result, Value::string("\x1B"));
1362    }
1363
1364    #[test]
1365    fn test_read_string_hex_escape_null() {
1366        let result = read(r#""\x0;""#).unwrap();
1367        assert_eq!(result, Value::string("\0"));
1368    }
1369
1370    #[test]
1371    fn test_read_string_hex_escape_unicode() {
1372        // \x3BB; is λ (Greek small letter lambda)
1373        let result = read(r#""\x3BB;""#).unwrap();
1374        assert_eq!(result, Value::string("λ"));
1375    }
1376
1377    #[test]
1378    fn test_read_string_hex_escape_emoji() {
1379        // \x1F600; is 😀
1380        let result = read(r#""\x1F600;""#).unwrap();
1381        assert_eq!(result, Value::string("😀"));
1382    }
1383
1384    #[test]
1385    fn test_read_string_hex_escape_in_context() {
1386        // Mix hex escapes with regular text and other escapes
1387        let result = read(r#""hello\x20;world""#).unwrap();
1388        assert_eq!(result, Value::string("hello world"));
1389    }
1390
1391    #[test]
1392    fn test_read_string_hex_escape_multiple() {
1393        let result = read(r#""\x48;\x69;""#).unwrap();
1394        assert_eq!(result, Value::string("Hi"));
1395    }
1396
1397    #[test]
1398    fn test_read_string_hex_escape_missing_semicolon() {
1399        assert!(read(r#""\x41""#).is_err());
1400    }
1401
1402    #[test]
1403    fn test_read_string_hex_escape_no_digits() {
1404        assert!(read(r#""\x;""#).is_err());
1405    }
1406
1407    #[test]
1408    fn test_read_string_hex_escape_invalid_hex() {
1409        assert!(read(r#""\xGG;""#).is_err());
1410    }
1411
1412    #[test]
1413    fn test_read_string_hex_escape_invalid_codepoint() {
1414        // 0xD800 is a surrogate — invalid Unicode scalar
1415        assert!(read(r#""\xD800;""#).is_err());
1416    }
1417
1418    #[test]
1419    fn test_read_string_hex_escape_too_large() {
1420        // 0x110000 is above Unicode max
1421        assert!(read(r#""\x110000;""#).is_err());
1422    }
1423
1424    #[test]
1425    fn test_read_string_u_escape_basic() {
1426        // \u0041 is 'A'
1427        let result = read(r#""\u0041""#).unwrap();
1428        assert_eq!(result, Value::string("A"));
1429    }
1430
1431    #[test]
1432    fn test_read_string_u_escape_lambda() {
1433        let result = read(r#""\u03BB""#).unwrap();
1434        assert_eq!(result, Value::string("λ"));
1435    }
1436
1437    #[test]
1438    fn test_read_string_u_escape_esc() {
1439        let result = read(r#""\u001B""#).unwrap();
1440        assert_eq!(result, Value::string("\x1B"));
1441    }
1442
1443    #[test]
1444    fn test_read_string_u_escape_too_few_digits() {
1445        assert!(read(r#""\u041""#).is_err());
1446    }
1447
1448    #[test]
1449    fn test_read_string_u_escape_surrogate() {
1450        assert!(read(r#""\uD800""#).is_err());
1451    }
1452
1453    #[test]
1454    fn test_read_string_big_u_escape_basic() {
1455        let result = read(r#""\U00000041""#).unwrap();
1456        assert_eq!(result, Value::string("A"));
1457    }
1458
1459    #[test]
1460    fn test_read_string_big_u_escape_emoji() {
1461        let result = read(r#""\U0001F600""#).unwrap();
1462        assert_eq!(result, Value::string("😀"));
1463    }
1464
1465    #[test]
1466    fn test_read_string_big_u_escape_too_few_digits() {
1467        assert!(read(r#""\U0041""#).is_err());
1468    }
1469
1470    #[test]
1471    fn test_read_string_big_u_escape_invalid() {
1472        assert!(read(r#""\U00110000""#).is_err());
1473    }
1474
1475    #[test]
1476    fn test_read_string_null_escape() {
1477        let result = read(r#""\0""#).unwrap();
1478        assert_eq!(result, Value::string("\0"));
1479    }
1480
1481    #[test]
1482    fn test_read_string_mixed_escapes() {
1483        // Mix all escape types in one string
1484        let result = read(r#""\x48;\u0069\n\t""#).unwrap();
1485        assert_eq!(result, Value::string("Hi\n\t"));
1486    }
1487
1488    #[test]
1489    fn test_read_string_ansi_escape_sequence() {
1490        // Real-world: ANSI color code ESC[31m (red)
1491        let result = read(r#""\x1B;[31mRed\x1B;[0m""#).unwrap();
1492        assert_eq!(result, Value::string("\x1B[31mRed\x1B[0m"));
1493    }
1494
1495    // ── f-string tests ──
1496
1497    #[test]
1498    fn test_read_fstring_no_interpolation() {
1499        let result = read(r#"f"hello""#).unwrap();
1500        assert_eq!(
1501            result,
1502            Value::list(vec![Value::symbol("str"), Value::string("hello")])
1503        );
1504    }
1505
1506    #[test]
1507    fn test_read_fstring_single_var() {
1508        let result = read(r#"f"hello ${name}""#).unwrap();
1509        assert_eq!(
1510            result,
1511            Value::list(vec![
1512                Value::symbol("str"),
1513                Value::string("hello "),
1514                Value::symbol("name"),
1515            ])
1516        );
1517    }
1518
1519    #[test]
1520    fn test_read_fstring_multiple_vars() {
1521        let result = read(r#"f"${a} and ${b}""#).unwrap();
1522        assert_eq!(
1523            result,
1524            Value::list(vec![
1525                Value::symbol("str"),
1526                Value::symbol("a"),
1527                Value::string(" and "),
1528                Value::symbol("b"),
1529            ])
1530        );
1531    }
1532
1533    #[test]
1534    fn test_read_fstring_expression() {
1535        let result = read(r#"f"result: ${(+ 1 2)}""#).unwrap();
1536        assert_eq!(
1537            result,
1538            Value::list(vec![
1539                Value::symbol("str"),
1540                Value::string("result: "),
1541                Value::list(vec![Value::symbol("+"), Value::int(1), Value::int(2),]),
1542            ])
1543        );
1544    }
1545
1546    #[test]
1547    fn test_read_fstring_escaped_dollar() {
1548        let result = read(r#"f"costs \$5""#).unwrap();
1549        assert_eq!(
1550            result,
1551            Value::list(vec![Value::symbol("str"), Value::string("costs $5")])
1552        );
1553    }
1554
1555    #[test]
1556    fn test_read_fstring_dollar_without_brace() {
1557        let result = read(r#"f"costs $5""#).unwrap();
1558        assert_eq!(
1559            result,
1560            Value::list(vec![Value::symbol("str"), Value::string("costs $5")])
1561        );
1562    }
1563
1564    #[test]
1565    fn test_read_fstring_escape_sequences() {
1566        let result = read(r#"f"line1\nline2""#).unwrap();
1567        assert_eq!(
1568            result,
1569            Value::list(vec![Value::symbol("str"), Value::string("line1\nline2"),])
1570        );
1571    }
1572
1573    #[test]
1574    fn test_read_fstring_empty_interpolation_error() {
1575        assert!(read(r#"f"hello ${}""#).is_err());
1576    }
1577
1578    #[test]
1579    fn test_read_fstring_unterminated_interpolation_error() {
1580        assert!(read(r#"f"hello ${name""#).is_err());
1581    }
1582
1583    #[test]
1584    fn test_read_fstring_unterminated_string_error() {
1585        assert!(read(r#"f"hello"#).is_err());
1586    }
1587
1588    #[test]
1589    fn test_read_fstring_multiple_forms_error() {
1590        // READ-2: `${x y}` carries two forms — must error, not silently drop `y`.
1591        let err = read(r#"f"${x y}""#).unwrap_err();
1592        assert!(
1593            err.to_string().contains("exactly one expression"),
1594            "expected single-expression error, got: {err}"
1595        );
1596    }
1597
1598    #[test]
1599    fn test_read_fstring_respects_depth_limit() {
1600        // READ-1: f-string interpolation must not reset the depth counter to 0.
1601        // A deeply nested form inside `${...}` must still trip MAX_PARSE_DEPTH
1602        // rather than recursing freely and risking a stack overflow. Run on a
1603        // large stack so the result reflects the depth check, not the small
1604        // default test-thread stack.
1605        let result = std::thread::Builder::new()
1606            .stack_size(16 * 1024 * 1024)
1607            .spawn(|| {
1608                let depth = 3000;
1609                let inner = format!("{}{}", "[".repeat(depth), "]".repeat(depth));
1610                let src = format!("f\"${{{inner}}}\"");
1611                read(&src).is_err()
1612            })
1613            .unwrap()
1614            .join()
1615            .expect("parser must not overflow the stack on deeply nested f-string");
1616        assert!(
1617            result,
1618            "expected a depth-limit error for deeply nested f-string interpolation"
1619        );
1620    }
1621
1622    #[test]
1623    fn test_read_fstring_keyword_access() {
1624        let result = read(r#"f"name: ${(:name user)}""#).unwrap();
1625        assert_eq!(
1626            result,
1627            Value::list(vec![
1628                Value::symbol("str"),
1629                Value::string("name: "),
1630                Value::list(vec![Value::keyword("name"), Value::symbol("user")]),
1631            ])
1632        );
1633    }
1634
1635    #[test]
1636    fn test_read_fstring_in_list() {
1637        let result = read(r#"(println f"hello ${name}")"#).unwrap();
1638        assert_eq!(
1639            result,
1640            Value::list(vec![
1641                Value::symbol("println"),
1642                Value::list(vec![
1643                    Value::symbol("str"),
1644                    Value::string("hello "),
1645                    Value::symbol("name"),
1646                ]),
1647            ])
1648        );
1649    }
1650
1651    #[test]
1652    fn test_read_fstring_empty() {
1653        let result = read(r#"f"""#).unwrap();
1654        assert_eq!(result, Value::list(vec![Value::symbol("str")]));
1655    }
1656
1657    #[test]
1658    fn test_read_fstring_only_expr() {
1659        let result = read(r#"f"${x}""#).unwrap();
1660        assert_eq!(
1661            result,
1662            Value::list(vec![Value::symbol("str"), Value::symbol("x")])
1663        );
1664    }
1665
1666    #[test]
1667    fn test_read_f_symbol_still_works() {
1668        // Plain 'f' symbol (not followed by '"') should still parse as symbol
1669        let result = read("f").unwrap();
1670        assert_eq!(result, Value::symbol("f"));
1671    }
1672
1673    #[test]
1674    fn test_read_f_prefixed_symbol_still_works() {
1675        // 'foo' should still parse as a normal symbol
1676        let result = read("foo").unwrap();
1677        assert_eq!(result, Value::symbol("foo"));
1678    }
1679
1680    // ── short lambda tests ──
1681
1682    #[test]
1683    fn test_read_short_lambda_single_arg() {
1684        // #(+ % 1) → (lambda (%1) (+ %1 1))
1685        let result = read("#(+ % 1)").unwrap();
1686        assert_eq!(
1687            result,
1688            Value::list(vec![
1689                Value::symbol("lambda"),
1690                Value::list(vec![Value::symbol("%1")]),
1691                Value::list(vec![Value::symbol("+"), Value::symbol("%1"), Value::int(1),]),
1692            ])
1693        );
1694    }
1695
1696    #[test]
1697    fn test_read_short_lambda_two_args() {
1698        // #(+ %1 %2) → (lambda (%1 %2) (+ %1 %2))
1699        let result = read("#(+ %1 %2)").unwrap();
1700        assert_eq!(
1701            result,
1702            Value::list(vec![
1703                Value::symbol("lambda"),
1704                Value::list(vec![Value::symbol("%1"), Value::symbol("%2")]),
1705                Value::list(vec![
1706                    Value::symbol("+"),
1707                    Value::symbol("%1"),
1708                    Value::symbol("%2"),
1709                ]),
1710            ])
1711        );
1712    }
1713
1714    #[test]
1715    fn test_read_short_lambda_bare_percent_is_percent1() {
1716        // #(* % %) → (lambda (%1) (* %1 %1))
1717        let result = read("#(* % %)").unwrap();
1718        assert_eq!(
1719            result,
1720            Value::list(vec![
1721                Value::symbol("lambda"),
1722                Value::list(vec![Value::symbol("%1")]),
1723                Value::list(vec![
1724                    Value::symbol("*"),
1725                    Value::symbol("%1"),
1726                    Value::symbol("%1"),
1727                ]),
1728            ])
1729        );
1730    }
1731
1732    #[test]
1733    fn test_read_short_lambda_no_args() {
1734        // #(println "hello") → (lambda () (println "hello"))
1735        let result = read(r#"#(println "hello")"#).unwrap();
1736        assert_eq!(
1737            result,
1738            Value::list(vec![
1739                Value::symbol("lambda"),
1740                Value::list(vec![]),
1741                Value::list(vec![Value::symbol("println"), Value::string("hello"),]),
1742            ])
1743        );
1744    }
1745
1746    #[test]
1747    fn test_read_short_lambda_in_list() {
1748        // (map #(+ % 1) numbers)
1749        let result = read("(map #(+ % 1) numbers)").unwrap();
1750        assert_eq!(
1751            result,
1752            Value::list(vec![
1753                Value::symbol("map"),
1754                Value::list(vec![
1755                    Value::symbol("lambda"),
1756                    Value::list(vec![Value::symbol("%1")]),
1757                    Value::list(vec![Value::symbol("+"), Value::symbol("%1"), Value::int(1),]),
1758                ]),
1759                Value::symbol("numbers"),
1760            ])
1761        );
1762    }
1763
1764    #[test]
1765    fn test_read_short_lambda_unterminated() {
1766        assert!(read("#(+ % 1").is_err());
1767    }
1768
1769    #[test]
1770    fn test_read_short_lambda_nested_expr() {
1771        // #(> (string-length %) 3) → (lambda (%1) (> (string-length %1) 3))
1772        let result = read("#(> (string-length %) 3)").unwrap();
1773        assert_eq!(
1774            result,
1775            Value::list(vec![
1776                Value::symbol("lambda"),
1777                Value::list(vec![Value::symbol("%1")]),
1778                Value::list(vec![
1779                    Value::symbol(">"),
1780                    Value::list(vec![Value::symbol("string-length"), Value::symbol("%1"),]),
1781                    Value::int(3),
1782                ]),
1783            ])
1784        );
1785    }
1786
1787    #[test]
1788    fn test_read_regex_literal_digits() {
1789        let result = read(r#"#"\d+""#).unwrap();
1790        assert_eq!(result, Value::string(r"\d+"));
1791    }
1792
1793    #[test]
1794    fn test_read_regex_literal_char_class() {
1795        let result = read(r#"#"[a-z]+""#).unwrap();
1796        assert_eq!(result, Value::string("[a-z]+"));
1797    }
1798
1799    #[test]
1800    fn test_read_regex_literal_backslashes_literal() {
1801        let result = read(r#"#"hello\.world""#).unwrap();
1802        assert_eq!(result, Value::string(r"hello\.world"));
1803    }
1804
1805    #[test]
1806    fn test_read_regex_literal_escaped_quote() {
1807        let result = read(r#"#"foo\"bar""#).unwrap();
1808        assert_eq!(result, Value::string(r#"foo"bar"#));
1809    }
1810
1811    #[test]
1812    fn test_read_regex_literal_unterminated() {
1813        assert!(read(r#"#"abc"#).is_err());
1814    }
1815
1816    #[test]
1817    fn test_mismatched_paren_bracket() {
1818        let err = read("(list [1 2 3)").unwrap_err();
1819        let msg = err.to_string();
1820        assert!(
1821            msg.contains("mismatched"),
1822            "expected mismatched error, got: {msg}"
1823        );
1824    }
1825
1826    #[test]
1827    fn test_mismatched_bracket_paren() {
1828        let err = read("[1 2 3)").unwrap_err();
1829        let msg = err.to_string();
1830        assert!(
1831            msg.contains("mismatched"),
1832            "expected mismatched error, got: {msg}"
1833        );
1834    }
1835
1836    #[test]
1837    fn test_mismatched_paren_brace() {
1838        let err = read("(+ 1 2}").unwrap_err();
1839        let msg = err.to_string();
1840        assert!(
1841            msg.contains("mismatched"),
1842            "expected mismatched error, got: {msg}"
1843        );
1844    }
1845
1846    #[test]
1847    fn test_mismatched_brace_paren() {
1848        let err = read("{:a 1)").unwrap_err();
1849        let msg = err.to_string();
1850        assert!(
1851            msg.contains("mismatched"),
1852            "expected mismatched error, got: {msg}"
1853        );
1854    }
1855
1856    #[test]
1857    fn test_mismatched_brace_bracket() {
1858        let err = read("{:a 1]").unwrap_err();
1859        let msg = err.to_string();
1860        assert!(
1861            msg.contains("mismatched"),
1862            "expected mismatched error, got: {msg}"
1863        );
1864    }
1865
1866    #[test]
1867    fn test_mismatched_bracket_brace() {
1868        let err = read("[1 2}").unwrap_err();
1869        let msg = err.to_string();
1870        assert!(
1871            msg.contains("mismatched"),
1872            "expected mismatched error, got: {msg}"
1873        );
1874    }
1875
1876    #[test]
1877    fn test_correct_brackets_still_work() {
1878        assert!(read("(list [1 2 3])").is_ok());
1879        assert!(read("{:a 1}").is_ok());
1880        assert!(read("[1 [2 3] 4]").is_ok());
1881    }
1882
1883    #[test]
1884    fn test_auto_gensym_symbol_parsing() {
1885        let val = read("v#").unwrap();
1886        assert_eq!(val.as_symbol().unwrap(), "v#");
1887
1888        let val = read("tmp#").unwrap();
1889        assert_eq!(val.as_symbol().unwrap(), "tmp#");
1890
1891        let val = read("`(let ((v# 1)) v#)").unwrap();
1892        let items = val.as_list().unwrap();
1893        assert_eq!(items[0].as_symbol().unwrap(), "quasiquote");
1894    }
1895
1896    #[test]
1897    fn test_hash_reader_dispatch_still_works() {
1898        let val = read("#t").unwrap();
1899        assert_eq!(val.as_bool(), Some(true));
1900
1901        let val = read("#f").unwrap();
1902        assert_eq!(val.as_bool(), Some(false));
1903
1904        let val = read("#\\space").unwrap();
1905        assert_eq!(val.as_char(), Some(' '));
1906
1907        let val = read("#(+ % 1)").unwrap();
1908        assert!(val.as_list().is_some());
1909    }
1910
1911    #[test]
1912    fn test_auto_gensym_edge_cases() {
1913        let val = read("x##").unwrap();
1914        assert_eq!(val.as_symbol().unwrap(), "x##");
1915
1916        let val = read(":foo").unwrap();
1917        assert!(val.as_keyword().is_some());
1918    }
1919
1920    // ── Error recovery tests ─────────────────────────────────────
1921
1922    #[test]
1923    fn recover_valid_input_no_errors() {
1924        let (exprs, _, _, errors) = read_many_with_spans_recover("(+ 1 2) (- 3 4)");
1925        assert!(errors.is_empty());
1926        assert_eq!(exprs.len(), 2);
1927    }
1928
1929    #[test]
1930    fn recover_stray_closer_then_valid() {
1931        // Stray `)` then a valid form
1932        let (exprs, _, _, errors) = read_many_with_spans_recover(") (+ 1 2)");
1933        assert_eq!(errors.len(), 1);
1934        assert_eq!(exprs.len(), 1);
1935    }
1936
1937    #[test]
1938    fn recover_unclosed_then_valid() {
1939        // Unclosed list, then a valid form on the next line
1940        let (_exprs, _, _, errors) = read_many_with_spans_recover("(define x\n(+ 1 2)");
1941        // The first `(define x` consumes tokens including `(+ 1 2)` as part of
1942        // its unterminated body, then hits EOF → 1 error, the (+ 1 2) is inside it
1943        assert_eq!(errors.len(), 1);
1944        // The second form got consumed by the unterminated first form
1945        // so recovery can't salvage it — this is expected
1946    }
1947
1948    #[test]
1949    fn recover_multiple_stray_closers() {
1950        let (exprs, _, _, errors) = read_many_with_spans_recover(") ] } (define x 1)");
1951        assert_eq!(errors.len(), 3);
1952        assert_eq!(exprs.len(), 1);
1953        assert!(exprs[0].as_list().is_some());
1954    }
1955
1956    #[test]
1957    fn recover_mismatched_bracket() {
1958        // Mismatched bracket: ( closed with ]
1959        let (exprs, _, _, errors) = read_many_with_spans_recover("(define x] (+ 1 2)");
1960        assert!(!errors.is_empty());
1961        // After the mismatch error, recovery should find `(+ 1 2)`
1962        assert!(!exprs.is_empty());
1963    }
1964
1965    #[test]
1966    fn recover_empty_input() {
1967        let (exprs, _, _, errors) = read_many_with_spans_recover("");
1968        assert!(errors.is_empty());
1969        assert!(exprs.is_empty());
1970    }
1971
1972    #[test]
1973    fn recover_only_errors() {
1974        let (exprs, _, _, errors) = read_many_with_spans_recover(") )");
1975        assert_eq!(errors.len(), 2);
1976        assert!(exprs.is_empty());
1977    }
1978
1979    #[test]
1980    fn recover_valid_between_errors() {
1981        // error, valid, error
1982        let (exprs, _, _, errors) = read_many_with_spans_recover(") (+ 1 2) )");
1983        assert_eq!(errors.len(), 2);
1984        assert_eq!(exprs.len(), 1);
1985    }
1986
1987    // ── symbol span tracking ──
1988
1989    #[test]
1990    fn test_symbol_spans_basic() {
1991        let (_, _, sym_spans) = read_many_with_symbol_spans("(define x 42)").unwrap();
1992        // Should record "define" and "x" (not 42 — it's an int, not a symbol)
1993        let names: Vec<&str> = sym_spans.iter().map(|(n, _)| n.as_str()).collect();
1994        assert!(names.contains(&"define"), "missing define in {:?}", names);
1995        assert!(names.contains(&"x"), "missing x in {:?}", names);
1996        assert_eq!(names.len(), 2);
1997    }
1998
1999    #[test]
2000    fn test_symbol_spans_positions() {
2001        let (_, _, sym_spans) = read_many_with_symbol_spans("(defun foo (x) x)").unwrap();
2002        // "foo" should have a precise span
2003        let foo = sym_spans.iter().find(|(n, _)| n == "foo").unwrap();
2004        assert_eq!(foo.1.line, 1);
2005        assert_eq!(foo.1.col, 8); // 1-indexed: "(defun " = 7 chars, foo starts at col 8
2006    }
2007
2008    #[test]
2009    fn test_symbol_spans_no_synthetic() {
2010        // '(a b) desugars to (quote (a b)) — "quote" should NOT appear in symbol_spans
2011        let (_, _, sym_spans) = read_many_with_symbol_spans("'(a b)").unwrap();
2012        let names: Vec<&str> = sym_spans.iter().map(|(n, _)| n.as_str()).collect();
2013        assert!(
2014            !names.contains(&"quote"),
2015            "synthetic 'quote' should not be in symbol_spans"
2016        );
2017        assert!(names.contains(&"a"));
2018        assert!(names.contains(&"b"));
2019    }
2020
2021    #[test]
2022    fn test_symbol_spans_multiple_forms() {
2023        let (_, _, sym_spans) =
2024            read_many_with_symbol_spans("(define x 1)\n(defun f (a) a)").unwrap();
2025        let names: Vec<&str> = sym_spans.iter().map(|(n, _)| n.as_str()).collect();
2026        assert!(names.contains(&"define"));
2027        assert!(names.contains(&"x"));
2028        assert!(names.contains(&"defun"));
2029        assert!(names.contains(&"f"));
2030        assert!(names.contains(&"a"));
2031        // "a" should appear twice (param + body reference)
2032        assert_eq!(names.iter().filter(|&&n| n == "a").count(), 2);
2033    }
2034
2035    #[test]
2036    fn test_symbol_spans_nil_excluded() {
2037        // "nil" parses as Value::nil(), not a symbol — should not be in symbol_spans
2038        let (_, _, sym_spans) = read_many_with_symbol_spans("nil").unwrap();
2039        assert!(sym_spans.is_empty());
2040    }
2041}