Skip to main content

sema_reader/
reader.rs

1use std::collections::BTreeMap;
2use std::rc::Rc;
3
4use sema_core::{resolve, SemaError, Span, SpanMap, Value, ValueView};
5
6use crate::lexer::{tokenize, FStringPart, SpannedToken, Token};
7
8/// Maximum nesting depth for parsing. Untrusted input (files, the WASM
9/// playground, f-string interpolations) must not be able to overflow the thread
10/// stack via thousands of nested forms. 1024 is far beyond any real program.
11const MAX_PARSE_DEPTH: usize = 1024;
12
13struct Parser {
14    tokens: Vec<SpannedToken>,
15    pos: usize,
16    span_map: SpanMap,
17    symbol_spans: Vec<(String, Span)>,
18    depth: usize,
19}
20
21impl Parser {
22    fn new(tokens: Vec<SpannedToken>) -> Self {
23        Parser {
24            tokens,
25            pos: 0,
26            span_map: SpanMap::new(),
27            symbol_spans: Vec::new(),
28            depth: 0,
29        }
30    }
31
32    fn peek(&self) -> Option<&Token> {
33        let mut pos = self.pos;
34        while let Some(t) = self.tokens.get(pos) {
35            match &t.token {
36                Token::Comment(_) | Token::Newline => pos += 1,
37                _ => return Some(&t.token),
38            }
39        }
40        None
41    }
42
43    fn span(&self) -> Span {
44        let mut pos = self.pos;
45        while let Some(t) = self.tokens.get(pos) {
46            match &t.token {
47                Token::Comment(_) | Token::Newline => pos += 1,
48                _ => return t.span,
49            }
50        }
51        Span::point(0, 0)
52    }
53
54    fn skip_trivia(&mut self) {
55        while let Some(t) = self.tokens.get(self.pos) {
56            match &t.token {
57                Token::Comment(_) | Token::Newline => self.pos += 1,
58                _ => break,
59            }
60        }
61    }
62
63    fn advance(&mut self) -> Option<&SpannedToken> {
64        self.skip_trivia();
65        let tok = self.tokens.get(self.pos);
66        if tok.is_some() {
67            self.pos += 1;
68        }
69        tok
70    }
71
72    fn expect(&mut self, expected: &Token) -> Result<(), SemaError> {
73        let span = self.span();
74        match self.advance() {
75            Some(t) if &t.token == expected => Ok(()),
76            Some(t) => Err(SemaError::Reader {
77                message: format!(
78                    "expected `{}`, got `{}`",
79                    token_display(expected),
80                    token_display(&t.token)
81                ),
82                span,
83            }),
84            None => Err(SemaError::Reader {
85                message: format!("expected `{}`, got end of input", token_display(expected)),
86                span,
87            }),
88        }
89    }
90
91    fn parse_expr(&mut self) -> Result<Value, SemaError> {
92        // Bound recursion depth on the single common entry point: every nested
93        // form (list/vector/map/short-lambda elements) recurses through here.
94        self.depth += 1;
95        if self.depth > MAX_PARSE_DEPTH {
96            self.depth -= 1;
97            return Err(SemaError::Reader {
98                message: format!("input nested too deeply (limit {MAX_PARSE_DEPTH})"),
99                span: self.span(),
100            }
101            .with_hint("reduce nesting depth"));
102        }
103        let result = self.parse_expr_inner();
104        self.depth -= 1;
105        result
106    }
107
108    fn parse_expr_inner(&mut self) -> Result<Value, SemaError> {
109        let span = self.span();
110        match self.peek() {
111            None => Err(SemaError::Reader {
112                message: "unexpected end of input".to_string(),
113                span,
114            }),
115            Some(Token::LParen) => self.parse_list(),
116            Some(Token::LBracket) => self.parse_vector(),
117            Some(Token::LBrace) => self.parse_map(),
118            Some(Token::Quote) => {
119                self.advance();
120                let inner = self.parse_expr().map_err(|_| {
121                    SemaError::Reader {
122                        message: "quote (') requires an expression after it".to_string(),
123                        span,
124                    }
125                    .with_hint("e.g. '(1 2 3) or 'foo")
126                })?;
127                self.make_list_with_span(vec![Value::symbol("quote"), inner], span)
128            }
129            Some(Token::Quasiquote) => {
130                self.advance();
131                let inner = self.parse_expr().map_err(|_| {
132                    SemaError::Reader {
133                        message: "quasiquote (`) requires an expression after it".to_string(),
134                        span,
135                    }
136                    .with_hint("e.g. `(list ,x)")
137                })?;
138                self.make_list_with_span(vec![Value::symbol("quasiquote"), inner], span)
139            }
140            Some(Token::Unquote) => {
141                self.advance();
142                let inner = self.parse_expr().map_err(|_| {
143                    SemaError::Reader {
144                        message: "unquote (,) requires an expression after it".to_string(),
145                        span,
146                    }
147                    .with_hint("use inside quasiquote, e.g. `(list ,x)")
148                })?;
149                self.make_list_with_span(vec![Value::symbol("unquote"), inner], span)
150            }
151            Some(Token::UnquoteSplice) => {
152                self.advance();
153                let inner = self.parse_expr().map_err(|_| {
154                    SemaError::Reader {
155                        message: "unquote-splicing (,@) requires an expression after it"
156                            .to_string(),
157                        span,
158                    }
159                    .with_hint("use inside quasiquote, e.g. `(list ,@xs)")
160                })?;
161                self.make_list_with_span(vec![Value::symbol("unquote-splicing"), inner], span)
162            }
163            Some(Token::BytevectorStart) => self.parse_bytevector(),
164            Some(Token::ShortLambdaStart) => self.parse_short_lambda(),
165            Some(_) => {
166                let val = self.parse_atom()?;
167                if let Some(name) = val.as_symbol() {
168                    self.symbol_spans.push((name, span));
169                }
170                Ok(val)
171            }
172        }
173    }
174
175    fn make_list_with_span(&mut self, items: Vec<Value>, span: Span) -> Result<Value, SemaError> {
176        let rc = Rc::new(items);
177        let ptr = Rc::as_ptr(&rc) as usize;
178        self.span_map.insert(ptr, span);
179        Ok(Value::list_from_rc(rc))
180    }
181
182    /// Get the span of the previously consumed token (the one at pos-1).
183    fn prev_span(&self) -> Span {
184        if self.pos > 0 {
185            self.tokens[self.pos - 1].span
186        } else {
187            Span::point(0, 0)
188        }
189    }
190
191    fn parse_list(&mut self) -> Result<Value, SemaError> {
192        let open_span = self.span();
193        self.expect(&Token::LParen)?;
194        let mut items = Vec::new();
195        while self.peek() != Some(&Token::RParen) {
196            if self.peek().is_none() {
197                return Err(SemaError::Reader {
198                    message: "unterminated list".to_string(),
199                    span: open_span,
200                }
201                .with_hint("add a closing `)`"));
202            }
203            if self.peek() == Some(&Token::RBracket) {
204                return Err(SemaError::Reader {
205                    message: "mismatched bracket: expected `)` to close `(`, found `]`".to_string(),
206                    span: self.span(),
207                }
208                .with_hint("this list was opened with `(` — close it with `)`"));
209            }
210            if self.peek() == Some(&Token::RBrace) {
211                return Err(SemaError::Reader {
212                    message: "mismatched bracket: expected `)` to close `(`, found `}`".to_string(),
213                    span: self.span(),
214                }
215                .with_hint("this list was opened with `(` — close it with `)`"));
216            }
217            // Handle dotted pairs: (a . b)
218            if self.peek() == Some(&Token::Dot) {
219                self.advance(); // skip dot
220                let cdr = self.parse_expr()?;
221                self.expect(&Token::RParen)?;
222                let close = self.prev_span();
223                items.push(Value::symbol("."));
224                items.push(cdr);
225                return self.make_list_with_span(items, open_span.to(&close));
226            }
227            items.push(self.parse_expr()?);
228        }
229        self.expect(&Token::RParen)?;
230        let close = self.prev_span();
231        self.make_list_with_span(items, open_span.to(&close))
232    }
233
234    fn parse_vector(&mut self) -> Result<Value, SemaError> {
235        let open_span = self.span();
236        self.expect(&Token::LBracket)?;
237        let mut items = Vec::new();
238        while self.peek() != Some(&Token::RBracket) {
239            if self.peek().is_none() {
240                return Err(SemaError::Reader {
241                    message: "unterminated vector".to_string(),
242                    span: open_span,
243                }
244                .with_hint("add a closing `]`"));
245            }
246            if self.peek() == Some(&Token::RParen) {
247                return Err(SemaError::Reader {
248                    message: "mismatched bracket: expected `]` to close `[`, found `)`".to_string(),
249                    span: self.span(),
250                }
251                .with_hint("this vector was opened with `[` — close it with `]`"));
252            }
253            if self.peek() == Some(&Token::RBrace) {
254                return Err(SemaError::Reader {
255                    message: "mismatched bracket: expected `]` to close `[`, found `}`".to_string(),
256                    span: self.span(),
257                }
258                .with_hint("this vector was opened with `[` — close it with `]`"));
259            }
260            items.push(self.parse_expr()?);
261        }
262        self.expect(&Token::RBracket)?;
263        let close = self.prev_span();
264        let rc = Rc::new(items);
265        let ptr = Rc::as_ptr(&rc) as usize;
266        self.span_map.insert(ptr, open_span.to(&close));
267        Ok(Value::vector_from_rc(rc))
268    }
269
270    fn parse_map(&mut self) -> Result<Value, SemaError> {
271        let open_span = self.span();
272        self.expect(&Token::LBrace)?;
273        let mut map = BTreeMap::new();
274        while self.peek() != Some(&Token::RBrace) {
275            if self.peek().is_none() {
276                return Err(SemaError::Reader {
277                    message: "unterminated map".to_string(),
278                    span: open_span,
279                }
280                .with_hint("add a closing `}`"));
281            }
282            if self.peek() == Some(&Token::RParen) {
283                return Err(SemaError::Reader {
284                    message: "mismatched bracket: expected `}` to close `{`, found `)`".to_string(),
285                    span: self.span(),
286                }
287                .with_hint("this map was opened with `{` — close it with `}`"));
288            }
289            if self.peek() == Some(&Token::RBracket) {
290                return Err(SemaError::Reader {
291                    message: "mismatched bracket: expected `}` to close `{`, found `]`".to_string(),
292                    span: self.span(),
293                }
294                .with_hint("this map was opened with `{` — close it with `}`"));
295            }
296            let key = self.parse_expr()?;
297            if self.peek() == Some(&Token::RBrace) || self.peek().is_none() {
298                return Err(SemaError::Reader {
299                    message: "map literal must have even number of forms".to_string(),
300                    span: self.span(),
301                });
302            }
303            let val = self.parse_expr()?;
304            map.insert(key, val);
305        }
306        self.expect(&Token::RBrace)?;
307        Ok(Value::map(map))
308    }
309
310    fn parse_bytevector(&mut self) -> Result<Value, SemaError> {
311        let open_span = self.span();
312        self.advance(); // consume BytevectorStart token
313        let mut bytes = Vec::new();
314        while self.peek() != Some(&Token::RParen) {
315            if self.peek().is_none() {
316                return Err(SemaError::Reader {
317                    message: "unterminated bytevector".to_string(),
318                    span: open_span,
319                }
320                .with_hint("add a closing `)`"));
321            }
322            let span = self.span();
323            match self.peek() {
324                Some(Token::Int(n)) => {
325                    let n = *n;
326                    self.advance();
327                    if !(0..=255).contains(&n) {
328                        return Err(SemaError::Reader {
329                            message: format!("#u8(...): byte value {n} out of range 0..255"),
330                            span,
331                        });
332                    }
333                    bytes.push(n as u8);
334                }
335                _ => {
336                    return Err(SemaError::Reader {
337                        message: "#u8(...): expected integer byte value".to_string(),
338                        span,
339                    });
340                }
341            }
342        }
343        self.expect(&Token::RParen)?;
344        Ok(Value::bytevector(bytes))
345    }
346
347    fn parse_short_lambda(&mut self) -> Result<Value, SemaError> {
348        let open_span = self.span();
349        self.advance(); // consume ShortLambdaStart
350        let mut body_items = Vec::new();
351        while self.peek() != Some(&Token::RParen) {
352            if self.peek().is_none() {
353                return Err(SemaError::Reader {
354                    message: "unterminated short lambda #(...)".to_string(),
355                    span: open_span,
356                }
357                .with_hint("add a closing `)`"));
358            }
359            body_items.push(self.parse_expr()?);
360        }
361        self.expect(&Token::RParen)?;
362
363        // Build the body as a single list form: (fn-name arg1 arg2 ...)
364        let body = Value::list(body_items);
365
366        // Scan body for % / %1 / %2 etc., rewrite % → %1
367        let mut max_arg: usize = 0;
368        let body = rewrite_percent_args(&body, &mut max_arg);
369
370        // Build parameter list
371        let params: Vec<Value> = if max_arg == 0 {
372            vec![]
373        } else {
374            (1..=max_arg)
375                .map(|n| Value::symbol(&format!("%{}", n)))
376                .collect()
377        };
378
379        Ok(Value::list(vec![
380            Value::symbol("lambda"),
381            Value::list(params),
382            body,
383        ]))
384    }
385
386    /// After a parse error, skip tokens until we reach a position that
387    /// could plausibly start a new top-level expression (depth-0 open bracket,
388    /// quote, or atom). This enables error recovery in `read_many_recover`.
389    fn recover_to_next_expr(&mut self) {
390        let mut depth: usize = 0;
391        while let Some(tok) = self.peek() {
392            match tok {
393                // Opening brackets increase depth
394                Token::LParen
395                | Token::LBracket
396                | Token::LBrace
397                | Token::ShortLambdaStart
398                | Token::BytevectorStart => {
399                    if depth == 0 {
400                        // This could start a new top-level form — stop here
401                        return;
402                    }
403                    self.advance();
404                    depth += 1;
405                }
406                // Closing brackets decrease depth
407                Token::RParen | Token::RBracket | Token::RBrace => {
408                    if depth == 0 {
409                        // Stray closer at top level — stop and let parse_expr report it
410                        return;
411                    }
412                    self.advance();
413                    depth -= 1;
414                }
415                // Quote-like prefixes at depth 0 could start a new form
416                Token::Quote | Token::Quasiquote | Token::Unquote | Token::UnquoteSplice => {
417                    if depth == 0 {
418                        return;
419                    }
420                    self.advance();
421                }
422                // Atoms at depth 0 could be a top-level expression
423                _ => {
424                    if depth == 0 {
425                        return;
426                    }
427                    self.advance();
428                }
429            }
430        }
431    }
432
433    fn parse_atom(&mut self) -> Result<Value, SemaError> {
434        let span = self.span();
435        match self.advance() {
436            Some(SpannedToken {
437                token: Token::Int(n),
438                ..
439            }) => Ok(Value::int(*n)),
440            Some(SpannedToken {
441                token: Token::Float(f),
442                ..
443            }) => Ok(Value::float(*f)),
444            Some(SpannedToken {
445                token: Token::String(s),
446                ..
447            }) => Ok(Value::string(s)),
448            Some(SpannedToken {
449                token: Token::Regex(s),
450                ..
451            }) => Ok(Value::string(s)),
452            Some(SpannedToken {
453                token: Token::Symbol(s),
454                ..
455            }) => {
456                if s == "nil" {
457                    Ok(Value::nil())
458                } else {
459                    Ok(Value::symbol(s))
460                }
461            }
462            Some(SpannedToken {
463                token: Token::Keyword(s),
464                ..
465            }) => Ok(Value::keyword(s)),
466            Some(SpannedToken {
467                token: Token::Bool(b),
468                ..
469            }) => Ok(Value::bool(*b)),
470            Some(SpannedToken {
471                token: Token::Char(c),
472                ..
473            }) => Ok(Value::char(*c)),
474            Some(SpannedToken {
475                token: Token::FString(parts),
476                ..
477            }) => {
478                let parts = parts.clone();
479                let mut items = vec![Value::symbol("str")];
480                for part in &parts {
481                    match part {
482                        FStringPart::Literal(s) => {
483                            if !s.is_empty() {
484                                items.push(Value::string(s));
485                            }
486                        }
487                        FStringPart::Expr(src) => {
488                            let val = read(src)?;
489                            items.push(val);
490                        }
491                    }
492                }
493                Ok(Value::list(items))
494            }
495            Some(t) => {
496                let (name, hint) = match &t.token {
497                    Token::RParen => (
498                        "unexpected closing `)`",
499                        Some("no matching opening parenthesis"),
500                    ),
501                    Token::RBracket => (
502                        "unexpected closing `]`",
503                        Some("no matching opening bracket"),
504                    ),
505                    Token::RBrace => ("unexpected closing `}`", Some("no matching opening brace")),
506                    Token::Dot => (
507                        "unexpected `.`",
508                        Some("dots are used in pair notation, e.g. (a . b)"),
509                    ),
510                    _ => ("unexpected token", None),
511                };
512                let err = SemaError::Reader {
513                    message: name.to_string(),
514                    span,
515                };
516                Err(if let Some(h) = hint {
517                    err.with_hint(h)
518                } else {
519                    err
520                })
521            }
522            None => Err(SemaError::Reader {
523                message: "unexpected end of input".to_string(),
524                span,
525            }),
526        }
527    }
528}
529
530fn token_display(tok: &Token) -> &'static str {
531    match tok {
532        Token::LParen => "(",
533        Token::RParen => ")",
534        Token::LBracket => "[",
535        Token::RBracket => "]",
536        Token::LBrace => "{",
537        Token::RBrace => "}",
538        Token::Quote => "'",
539        Token::Quasiquote => "`",
540        Token::Unquote => ",",
541        Token::UnquoteSplice => ",@",
542        Token::Dot => ".",
543        Token::BytevectorStart => "#u8(",
544        Token::Int(_) => "integer",
545        Token::Float(_) => "float",
546        Token::String(_) => "string",
547        Token::Symbol(_) => "symbol",
548        Token::Keyword(_) => "keyword",
549        Token::Bool(_) => "boolean",
550        Token::Char(_) => "character",
551        Token::FString(_) => "f-string",
552        Token::ShortLambdaStart => "#(",
553        Token::Comment(_) => "comment",
554        Token::Newline => "newline",
555        Token::Regex(_) => "regex",
556    }
557}
558
559/// Recursively scan a Value AST for `%`, `%1`, `%2`, etc. symbols.
560/// Rewrites bare `%` to `%1`. Tracks the highest numbered arg in `max_arg`.
561/// Skips recursion into nested `(lambda ...)` / `(fn ...)` forms.
562fn rewrite_percent_args(expr: &Value, max_arg: &mut usize) -> Value {
563    match expr.view() {
564        ValueView::Symbol(spur) => {
565            let name = resolve(spur);
566            if name == "%" {
567                *max_arg = (*max_arg).max(1);
568                Value::symbol("%1")
569            } else if let Some(rest) = name.strip_prefix('%') {
570                if let Ok(n) = rest.parse::<usize>() {
571                    if n > 0 {
572                        *max_arg = (*max_arg).max(n);
573                    }
574                }
575                expr.clone()
576            } else {
577                expr.clone()
578            }
579        }
580        ValueView::List(items) => {
581            // Skip nested (lambda ...) / (fn ...) forms — their % args are their own
582            if let Some(first) = items.first() {
583                if let ValueView::Symbol(s) = first.view() {
584                    let name = resolve(s);
585                    if name == "lambda" || name == "fn" {
586                        return expr.clone();
587                    }
588                }
589            }
590            let new_items: Vec<Value> = items
591                .iter()
592                .map(|item| rewrite_percent_args(item, max_arg))
593                .collect();
594            Value::list(new_items)
595        }
596        ValueView::Vector(items) => {
597            let new_items: Vec<Value> = items
598                .iter()
599                .map(|item| rewrite_percent_args(item, max_arg))
600                .collect();
601            Value::vector(new_items)
602        }
603        _ => expr.clone(),
604    }
605}
606
607/// Read a single s-expression from a string.
608pub fn read(input: &str) -> Result<Value, SemaError> {
609    let tokens = tokenize(input)?;
610    let mut parser = Parser::new(tokens);
611    if parser.peek().is_none() {
612        return Ok(Value::nil());
613    }
614    parser.parse_expr()
615}
616
617/// Read all s-expressions from a string.
618pub fn read_many(input: &str) -> Result<Vec<Value>, SemaError> {
619    let tokens = tokenize(input)?;
620    let mut parser = Parser::new(tokens);
621    let mut exprs = Vec::new();
622    while parser.peek().is_some() {
623        exprs.push(parser.parse_expr()?);
624    }
625    Ok(exprs)
626}
627
628/// Read all s-expressions and return the accumulated span map.
629pub fn read_many_with_spans(input: &str) -> Result<(Vec<Value>, SpanMap), SemaError> {
630    let tokens = tokenize(input)?;
631    let mut parser = Parser::new(tokens);
632    let mut exprs = Vec::new();
633    while parser.peek().is_some() {
634        exprs.push(parser.parse_expr()?);
635    }
636    Ok((exprs, parser.span_map))
637}
638
639/// Read all s-expressions and return spans for both compound expressions and individual symbols.
640/// Symbol spans enable precise go-to-definition (jumping to the name, not the whole form).
641#[allow(clippy::type_complexity)]
642pub fn read_many_with_symbol_spans(
643    input: &str,
644) -> Result<(Vec<Value>, SpanMap, Vec<(String, Span)>), SemaError> {
645    let tokens = tokenize(input)?;
646    let mut parser = Parser::new(tokens);
647    let mut exprs = Vec::new();
648    while parser.peek().is_some() {
649        exprs.push(parser.parse_expr()?);
650    }
651    Ok((exprs, parser.span_map, parser.symbol_spans))
652}
653
654/// Read all s-expressions with error recovery.
655/// On parse errors, skips to the next top-level form and continues.
656/// Returns (successfully parsed forms, span map, collected errors).
657/// Tokenizer errors are returned as a single error with no parsed forms.
658#[allow(clippy::type_complexity)]
659pub fn read_many_with_spans_recover(
660    input: &str,
661) -> (Vec<Value>, SpanMap, Vec<(String, Span)>, Vec<SemaError>) {
662    let tokens = match tokenize(input) {
663        Ok(t) => t,
664        Err(e) => return (vec![], SpanMap::new(), vec![], vec![e]),
665    };
666    let mut parser = Parser::new(tokens);
667    let mut exprs = Vec::new();
668    let mut errors = Vec::new();
669    while parser.peek().is_some() {
670        match parser.parse_expr() {
671            Ok(expr) => exprs.push(expr),
672            Err(err) => {
673                errors.push(err);
674                parser.recover_to_next_expr();
675            }
676        }
677    }
678    (exprs, parser.span_map, parser.symbol_spans, errors)
679}
680
681#[cfg(test)]
682mod tests {
683    use super::*;
684
685    #[test]
686    fn test_read_int() {
687        assert_eq!(read("42").unwrap(), Value::int(42));
688    }
689
690    #[test]
691    fn deeply_nested_input_errors_instead_of_overflowing() {
692        // Untrusted input with thousands of levels of nesting must return a
693        // reader error rather than recurse to a stack overflow. Run on a large
694        // stack so the result reflects the depth-limit check, not the small
695        // default test-thread stack (which would SIGSEGV either way).
696        let result = std::thread::Builder::new()
697            .stack_size(16 * 1024 * 1024)
698            .spawn(|| {
699                let depth = 3000;
700                let src = format!("{}{}", "[".repeat(depth), "]".repeat(depth));
701                read(&src).is_err()
702            })
703            .unwrap()
704            .join()
705            .expect("parser must not overflow the stack on deeply nested input");
706        assert!(
707            result,
708            "expected a depth-limit error for deeply nested input"
709        );
710    }
711
712    #[test]
713    fn test_read_negative_int() {
714        assert_eq!(read("-7").unwrap(), Value::int(-7));
715    }
716
717    #[test]
718    fn test_read_float() {
719        assert_eq!(read("3.14").unwrap(), Value::float(3.14));
720    }
721
722    #[test]
723    fn test_read_string() {
724        assert_eq!(read("\"hello\"").unwrap(), Value::string("hello"));
725    }
726
727    #[test]
728    fn test_read_symbol() {
729        assert_eq!(read("foo").unwrap(), Value::symbol("foo"));
730    }
731
732    #[test]
733    fn test_read_keyword() {
734        assert_eq!(read(":bar").unwrap(), Value::keyword("bar"));
735    }
736
737    #[test]
738    fn test_read_bool() {
739        assert_eq!(read("#t").unwrap(), Value::bool(true));
740        assert_eq!(read("#f").unwrap(), Value::bool(false));
741    }
742
743    #[test]
744    fn test_read_list() {
745        let result = read("(+ 1 2)").unwrap();
746        assert_eq!(
747            result,
748            Value::list(vec![Value::symbol("+"), Value::int(1), Value::int(2)])
749        );
750    }
751
752    #[test]
753    fn test_read_nested_list() {
754        let result = read("(* (+ 1 2) 3)").unwrap();
755        assert_eq!(
756            result,
757            Value::list(vec![
758                Value::symbol("*"),
759                Value::list(vec![Value::symbol("+"), Value::int(1), Value::int(2)]),
760                Value::int(3)
761            ])
762        );
763    }
764
765    #[test]
766    fn test_read_vector() {
767        let result = read("[1 2 3]").unwrap();
768        assert_eq!(
769            result,
770            Value::vector(vec![Value::int(1), Value::int(2), Value::int(3)])
771        );
772    }
773
774    #[test]
775    fn test_read_map() {
776        let result = read("{:a 1 :b 2}").unwrap();
777        let mut expected = BTreeMap::new();
778        expected.insert(Value::keyword("a"), Value::int(1));
779        expected.insert(Value::keyword("b"), Value::int(2));
780        assert_eq!(result, Value::map(expected));
781    }
782
783    #[test]
784    fn test_read_quote() {
785        let result = read("'foo").unwrap();
786        assert_eq!(
787            result,
788            Value::list(vec![Value::symbol("quote"), Value::symbol("foo")])
789        );
790    }
791
792    #[test]
793    fn test_read_quasiquote() {
794        let result = read("`(a ,b ,@c)").unwrap();
795        assert_eq!(
796            result,
797            Value::list(vec![
798                Value::symbol("quasiquote"),
799                Value::list(vec![
800                    Value::symbol("a"),
801                    Value::list(vec![Value::symbol("unquote"), Value::symbol("b")]),
802                    Value::list(vec![Value::symbol("unquote-splicing"), Value::symbol("c")]),
803                ])
804            ])
805        );
806    }
807
808    #[test]
809    fn test_read_nil() {
810        assert_eq!(read("nil").unwrap(), Value::nil());
811    }
812
813    #[test]
814    fn test_read_many_exprs() {
815        let results = read_many("1 2 3").unwrap();
816        assert_eq!(results, vec![Value::int(1), Value::int(2), Value::int(3)]);
817    }
818
819    #[test]
820    fn test_comments() {
821        let result = read_many("; comment\n(+ 1 2)").unwrap();
822        assert_eq!(result.len(), 1);
823    }
824
825    #[test]
826    fn test_read_zero() {
827        assert_eq!(read("0").unwrap(), Value::int(0));
828    }
829
830    #[test]
831    fn test_read_negative_zero() {
832        assert_eq!(read("-0").unwrap(), Value::int(0));
833    }
834
835    #[test]
836    fn test_read_leading_zeros() {
837        assert_eq!(read("007").unwrap(), Value::int(7));
838    }
839
840    #[test]
841    fn test_read_large_int() {
842        assert_eq!(read("9999999999999").unwrap(), Value::int(9999999999999));
843    }
844
845    #[test]
846    fn test_read_int_overflow() {
847        // i64::MAX + 1 should error, not silently wrap
848        assert!(read("9999999999999999999999").is_err());
849    }
850
851    #[test]
852    fn test_read_negative_float() {
853        assert_eq!(read("-2.5").unwrap(), Value::float(-2.5));
854    }
855
856    #[test]
857    fn test_read_float_leading_zero() {
858        assert_eq!(read("0.5").unwrap(), Value::float(0.5));
859    }
860
861    #[test]
862    fn test_read_minus_is_symbol() {
863        // Bare `-` should be a symbol (subtraction operator), not a number
864        assert_eq!(read("-").unwrap(), Value::symbol("-"));
865    }
866
867    #[test]
868    fn test_read_minus_in_list() {
869        // `(- 3)` should parse as call to `-` with arg 3
870        let result = read("(- 3)").unwrap();
871        assert_eq!(result, Value::list(vec![Value::symbol("-"), Value::int(3)]));
872    }
873
874    #[test]
875    fn test_read_negative_in_list() {
876        // `(-3)` should parse as list containing -3
877        let result = read("(-3)").unwrap();
878        assert_eq!(result, Value::list(vec![Value::int(-3)]));
879    }
880
881    #[test]
882    fn test_read_empty_string() {
883        assert_eq!(read(r#""""#).unwrap(), Value::string(""));
884    }
885
886    #[test]
887    fn test_read_string_with_escapes() {
888        assert_eq!(
889            read(r#""\n\t\r\\\"" "#).unwrap(),
890            Value::string("\n\t\r\\\"")
891        );
892    }
893
894    #[test]
895    fn test_read_string_unknown_escape() {
896        // Unknown escape sequences are preserved literally
897        assert_eq!(read(r#""\z""#).unwrap(), Value::string("\\z"));
898    }
899
900    #[test]
901    fn test_read_string_with_newline() {
902        assert_eq!(
903            read("\"line1\nline2\"").unwrap(),
904            Value::string("line1\nline2")
905        );
906    }
907
908    #[test]
909    fn test_read_unterminated_string() {
910        assert!(read("\"hello").is_err());
911    }
912
913    #[test]
914    fn test_read_string_escaped_quote_at_end() {
915        // `"test\"` — the backslash escapes the quote, string is unterminated
916        assert!(read(r#""test\""#).is_err());
917    }
918
919    #[test]
920    fn test_read_string_with_unicode() {
921        assert_eq!(read("\"héllo\"").unwrap(), Value::string("héllo"));
922        assert_eq!(read("\"日本語\"").unwrap(), Value::string("日本語"));
923        assert_eq!(read("\"🎉\"").unwrap(), Value::string("🎉"));
924    }
925
926    #[test]
927    fn test_read_string_with_parens() {
928        assert_eq!(read("\"(+ 1 2)\"").unwrap(), Value::string("(+ 1 2)"));
929    }
930
931    #[test]
932    fn test_read_operator_symbols() {
933        assert_eq!(read("+").unwrap(), Value::symbol("+"));
934        assert_eq!(read("*").unwrap(), Value::symbol("*"));
935        assert_eq!(read("/").unwrap(), Value::symbol("/"));
936        assert_eq!(read("<=").unwrap(), Value::symbol("<="));
937        assert_eq!(read(">=").unwrap(), Value::symbol(">="));
938    }
939
940    #[test]
941    fn test_read_predicate_symbols() {
942        assert_eq!(read("null?").unwrap(), Value::symbol("null?"));
943        assert_eq!(read("list?").unwrap(), Value::symbol("list?"));
944    }
945
946    #[test]
947    fn test_read_arrow_symbols() {
948        assert_eq!(
949            read("string->symbol").unwrap(),
950            Value::symbol("string->symbol")
951        );
952    }
953
954    #[test]
955    fn test_read_namespaced_symbols() {
956        assert_eq!(read("file/read").unwrap(), Value::symbol("file/read"));
957        assert_eq!(read("http/get").unwrap(), Value::symbol("http/get"));
958    }
959
960    #[test]
961    fn test_read_true_false_as_bool() {
962        assert_eq!(read("true").unwrap(), Value::bool(true));
963        assert_eq!(read("false").unwrap(), Value::bool(false));
964    }
965
966    #[test]
967    fn test_read_bare_colon_error() {
968        // `:` alone without a name should error
969        assert!(read(":").is_err());
970    }
971
972    #[test]
973    fn test_read_keyword_with_numbers() {
974        assert_eq!(read(":foo123").unwrap(), Value::keyword("foo123"));
975    }
976
977    #[test]
978    fn test_read_keyword_with_hyphens() {
979        assert_eq!(read(":max-turns").unwrap(), Value::keyword("max-turns"));
980    }
981
982    #[test]
983    fn test_read_hash_invalid() {
984        assert!(read("#x").is_err());
985        assert!(read("#").is_err());
986    }
987
988    #[test]
989    fn test_read_empty() {
990        assert_eq!(read("").unwrap(), Value::nil());
991    }
992
993    #[test]
994    fn test_read_whitespace_only() {
995        assert_eq!(read("   \n\t  ").unwrap(), Value::nil());
996    }
997
998    #[test]
999    fn test_read_many_empty() {
1000        assert_eq!(read_many("").unwrap(), vec![]);
1001    }
1002
1003    #[test]
1004    fn test_read_many_whitespace_only() {
1005        assert_eq!(read_many("  \n  ").unwrap(), vec![]);
1006    }
1007
1008    #[test]
1009    fn test_read_comment_only() {
1010        assert_eq!(read_many("; just a comment").unwrap(), vec![]);
1011    }
1012
1013    #[test]
1014    fn test_read_empty_list() {
1015        assert_eq!(read("()").unwrap(), Value::list(vec![]));
1016    }
1017
1018    #[test]
1019    fn test_read_deeply_nested() {
1020        let result = read("((((42))))").unwrap();
1021        assert_eq!(
1022            result,
1023            Value::list(vec![Value::list(vec![Value::list(vec![Value::list(
1024                vec![Value::int(42)]
1025            )])])])
1026        );
1027    }
1028
1029    #[test]
1030    fn test_read_unterminated_list() {
1031        assert!(read("(1 2").is_err());
1032    }
1033
1034    #[test]
1035    fn test_read_extra_rparen() {
1036        // `read` only reads one expr, so extra `)` is just ignored (not consumed)
1037        // But `read_many` should fail since `)` is not a valid expr start
1038        let result = read("42").unwrap();
1039        assert_eq!(result, Value::int(42));
1040    }
1041
1042    #[test]
1043    fn test_read_dotted_pair() {
1044        let result = read("(a . b)").unwrap();
1045        assert_eq!(
1046            result,
1047            Value::list(vec![
1048                Value::symbol("a"),
1049                Value::symbol("."),
1050                Value::symbol("b")
1051            ])
1052        );
1053    }
1054
1055    #[test]
1056    fn test_read_empty_vector() {
1057        assert_eq!(read("[]").unwrap(), Value::vector(vec![]));
1058    }
1059
1060    #[test]
1061    fn test_read_unterminated_vector() {
1062        assert!(read("[1 2").is_err());
1063    }
1064
1065    #[test]
1066    fn test_read_empty_map() {
1067        assert_eq!(read("{}").unwrap(), Value::map(BTreeMap::new()));
1068    }
1069
1070    #[test]
1071    fn test_read_unterminated_map() {
1072        assert!(read("{:a 1").is_err());
1073    }
1074
1075    #[test]
1076    fn test_read_map_odd_elements() {
1077        assert!(read("{:a 1 :b}").is_err());
1078    }
1079
1080    #[test]
1081    fn test_read_map_duplicate_keys() {
1082        // Later key wins (BTreeMap insert replaces)
1083        let result = read("{:a 1 :a 2}").unwrap();
1084        let mut expected = BTreeMap::new();
1085        expected.insert(Value::keyword("a"), Value::int(2));
1086        assert_eq!(result, Value::map(expected));
1087    }
1088
1089    #[test]
1090    fn test_read_nested_quote() {
1091        let result = read("''foo").unwrap();
1092        assert_eq!(
1093            result,
1094            Value::list(vec![
1095                Value::symbol("quote"),
1096                Value::list(vec![Value::symbol("quote"), Value::symbol("foo")])
1097            ])
1098        );
1099    }
1100
1101    #[test]
1102    fn test_read_quote_list() {
1103        let result = read("'(1 2 3)").unwrap();
1104        assert_eq!(
1105            result,
1106            Value::list(vec![
1107                Value::symbol("quote"),
1108                Value::list(vec![Value::int(1), Value::int(2), Value::int(3)])
1109            ])
1110        );
1111    }
1112
1113    #[test]
1114    fn test_read_quote_at_eof() {
1115        assert!(read("'").is_err());
1116    }
1117
1118    #[test]
1119    fn test_read_unquote_at_eof() {
1120        assert!(read(",").is_err());
1121    }
1122
1123    #[test]
1124    fn test_read_unquote_splice_at_eof() {
1125        assert!(read(",@").is_err());
1126    }
1127
1128    #[test]
1129    fn test_read_quasiquote_at_eof() {
1130        assert!(read("`").is_err());
1131    }
1132
1133    #[test]
1134    fn test_read_comment_after_expr() {
1135        assert_eq!(read_many("42 ; comment").unwrap(), vec![Value::int(42)]);
1136    }
1137
1138    #[test]
1139    fn test_read_multiple_comments() {
1140        let result = read_many("; first\n; second\n42").unwrap();
1141        assert_eq!(result, vec![Value::int(42)]);
1142    }
1143
1144    #[test]
1145    fn test_read_comment_no_newline() {
1146        // Comment at end of input without trailing newline
1147        assert_eq!(read_many("; comment").unwrap(), vec![]);
1148    }
1149
1150    #[test]
1151    fn test_read_crlf_line_endings() {
1152        let result = read_many("1\r\n2\r\n3").unwrap();
1153        assert_eq!(result, vec![Value::int(1), Value::int(2), Value::int(3)]);
1154    }
1155
1156    #[test]
1157    fn test_read_tabs_as_whitespace() {
1158        assert_eq!(
1159            read("(\t+\t1\t2\t)").unwrap(),
1160            Value::list(vec![Value::symbol("+"), Value::int(1), Value::int(2)])
1161        );
1162    }
1163
1164    #[test]
1165    fn test_read_mixed_collections() {
1166        // List containing vector and map
1167        let result = read("([1 2] {:a 3})").unwrap();
1168        let mut map = BTreeMap::new();
1169        map.insert(Value::keyword("a"), Value::int(3));
1170        assert_eq!(
1171            result,
1172            Value::list(vec![
1173                Value::vector(vec![Value::int(1), Value::int(2)]),
1174                Value::map(map)
1175            ])
1176        );
1177    }
1178
1179    #[test]
1180    fn test_read_many_mixed_types() {
1181        let result = read_many(r#"42 3.14 "hello" foo :bar #t nil"#).unwrap();
1182        assert_eq!(result.len(), 7);
1183        assert_eq!(result[0], Value::int(42));
1184        assert_eq!(result[1], Value::float(3.14));
1185        assert_eq!(result[2], Value::string("hello"));
1186        assert_eq!(result[3], Value::symbol("foo"));
1187        assert_eq!(result[4], Value::keyword("bar"));
1188        assert_eq!(result[5], Value::bool(true));
1189        assert_eq!(result[6], Value::nil());
1190    }
1191
1192    #[test]
1193    fn test_span_map_tracks_lists() {
1194        let (exprs, spans) = read_many_with_spans("(+ 1 2)").unwrap();
1195        assert_eq!(exprs.len(), 1);
1196        // The list should have a span entry
1197        let rc = exprs[0].as_list_rc().expect("expected list");
1198        let ptr = Rc::as_ptr(&rc) as usize;
1199        let span = spans.get(&ptr).expect("list should have span");
1200        assert_eq!(span.line, 1);
1201        assert_eq!(span.col, 1);
1202    }
1203
1204    #[test]
1205    fn test_span_map_multiline() {
1206        let (exprs, spans) = read_many_with_spans("(foo)\n(bar)").unwrap();
1207        assert_eq!(exprs.len(), 2);
1208        let rc = exprs[1].as_list_rc().expect("expected list");
1209        let ptr = Rc::as_ptr(&rc) as usize;
1210        let span = spans.get(&ptr).expect("second list should have span");
1211        assert_eq!(span.line, 2);
1212        assert_eq!(span.col, 1);
1213    }
1214
1215    #[test]
1216    fn test_read_unexpected_char() {
1217        assert!(read("@").is_err());
1218        assert!(read("$").is_err());
1219    }
1220
1221    #[test]
1222    fn test_read_char_literal() {
1223        assert_eq!(read("#\\a").unwrap(), Value::char('a'));
1224        assert_eq!(read("#\\Z").unwrap(), Value::char('Z'));
1225        assert_eq!(read("#\\0").unwrap(), Value::char('0'));
1226    }
1227
1228    #[test]
1229    fn test_read_char_named() {
1230        assert_eq!(read("#\\space").unwrap(), Value::char(' '));
1231        assert_eq!(read("#\\newline").unwrap(), Value::char('\n'));
1232        assert_eq!(read("#\\tab").unwrap(), Value::char('\t'));
1233        assert_eq!(read("#\\return").unwrap(), Value::char('\r'));
1234        assert_eq!(read("#\\nul").unwrap(), Value::char('\0'));
1235    }
1236
1237    #[test]
1238    fn test_read_char_special() {
1239        assert_eq!(read("#\\(").unwrap(), Value::char('('));
1240        assert_eq!(read("#\\)").unwrap(), Value::char(')'));
1241    }
1242
1243    #[test]
1244    fn test_read_char_in_list() {
1245        let result = read("(#\\a #\\b)").unwrap();
1246        assert_eq!(
1247            result,
1248            Value::list(vec![Value::char('a'), Value::char('b')])
1249        );
1250    }
1251
1252    #[test]
1253    fn test_read_char_unknown_name() {
1254        assert!(read("#\\foobar").is_err());
1255    }
1256
1257    #[test]
1258    fn test_read_char_eof() {
1259        assert!(read("#\\").is_err());
1260    }
1261
1262    #[test]
1263    fn test_read_bytevector_literal() {
1264        assert_eq!(
1265            read("#u8(1 2 3)").unwrap(),
1266            Value::bytevector(vec![1, 2, 3])
1267        );
1268    }
1269
1270    #[test]
1271    fn test_read_bytevector_empty() {
1272        assert_eq!(read("#u8()").unwrap(), Value::bytevector(vec![]));
1273    }
1274
1275    #[test]
1276    fn test_read_bytevector_single() {
1277        assert_eq!(read("#u8(255)").unwrap(), Value::bytevector(vec![255]));
1278    }
1279
1280    #[test]
1281    fn test_read_bytevector_out_of_range() {
1282        assert!(read("#u8(256)").is_err());
1283    }
1284
1285    #[test]
1286    fn test_read_bytevector_negative() {
1287        assert!(read("#u8(-1)").is_err());
1288    }
1289
1290    #[test]
1291    fn test_read_bytevector_non_integer() {
1292        assert!(read("#u8(1.5)").is_err());
1293    }
1294
1295    #[test]
1296    fn test_read_bytevector_unterminated() {
1297        assert!(read("#u8(1 2").is_err());
1298    }
1299
1300    #[test]
1301    fn test_read_bytevector_in_list() {
1302        let result = read("(#u8(1 2) #u8(3))").unwrap();
1303        assert_eq!(
1304            result,
1305            Value::list(vec![
1306                Value::bytevector(vec![1, 2]),
1307                Value::bytevector(vec![3]),
1308            ])
1309        );
1310    }
1311
1312    #[test]
1313    fn test_read_string_hex_escape_basic() {
1314        // \x41; is 'A'
1315        let result = read(r#""\x41;""#).unwrap();
1316        assert_eq!(result, Value::string("A"));
1317    }
1318
1319    #[test]
1320    fn test_read_string_hex_escape_lowercase() {
1321        let result = read(r#""\x6c;""#).unwrap();
1322        assert_eq!(result, Value::string("l"));
1323    }
1324
1325    #[test]
1326    fn test_read_string_hex_escape_mixed_case() {
1327        let result = read(r#""\x4F;""#).unwrap();
1328        assert_eq!(result, Value::string("O"));
1329    }
1330
1331    #[test]
1332    fn test_read_string_hex_escape_esc_char() {
1333        // \x1B; is ESC (0x1b) — the main motivating use case
1334        let result = read(r#""\x1B;""#).unwrap();
1335        assert_eq!(result, Value::string("\x1B"));
1336    }
1337
1338    #[test]
1339    fn test_read_string_hex_escape_null() {
1340        let result = read(r#""\x0;""#).unwrap();
1341        assert_eq!(result, Value::string("\0"));
1342    }
1343
1344    #[test]
1345    fn test_read_string_hex_escape_unicode() {
1346        // \x3BB; is λ (Greek small letter lambda)
1347        let result = read(r#""\x3BB;""#).unwrap();
1348        assert_eq!(result, Value::string("λ"));
1349    }
1350
1351    #[test]
1352    fn test_read_string_hex_escape_emoji() {
1353        // \x1F600; is 😀
1354        let result = read(r#""\x1F600;""#).unwrap();
1355        assert_eq!(result, Value::string("😀"));
1356    }
1357
1358    #[test]
1359    fn test_read_string_hex_escape_in_context() {
1360        // Mix hex escapes with regular text and other escapes
1361        let result = read(r#""hello\x20;world""#).unwrap();
1362        assert_eq!(result, Value::string("hello world"));
1363    }
1364
1365    #[test]
1366    fn test_read_string_hex_escape_multiple() {
1367        let result = read(r#""\x48;\x69;""#).unwrap();
1368        assert_eq!(result, Value::string("Hi"));
1369    }
1370
1371    #[test]
1372    fn test_read_string_hex_escape_missing_semicolon() {
1373        assert!(read(r#""\x41""#).is_err());
1374    }
1375
1376    #[test]
1377    fn test_read_string_hex_escape_no_digits() {
1378        assert!(read(r#""\x;""#).is_err());
1379    }
1380
1381    #[test]
1382    fn test_read_string_hex_escape_invalid_hex() {
1383        assert!(read(r#""\xGG;""#).is_err());
1384    }
1385
1386    #[test]
1387    fn test_read_string_hex_escape_invalid_codepoint() {
1388        // 0xD800 is a surrogate — invalid Unicode scalar
1389        assert!(read(r#""\xD800;""#).is_err());
1390    }
1391
1392    #[test]
1393    fn test_read_string_hex_escape_too_large() {
1394        // 0x110000 is above Unicode max
1395        assert!(read(r#""\x110000;""#).is_err());
1396    }
1397
1398    #[test]
1399    fn test_read_string_u_escape_basic() {
1400        // \u0041 is 'A'
1401        let result = read(r#""\u0041""#).unwrap();
1402        assert_eq!(result, Value::string("A"));
1403    }
1404
1405    #[test]
1406    fn test_read_string_u_escape_lambda() {
1407        let result = read(r#""\u03BB""#).unwrap();
1408        assert_eq!(result, Value::string("λ"));
1409    }
1410
1411    #[test]
1412    fn test_read_string_u_escape_esc() {
1413        let result = read(r#""\u001B""#).unwrap();
1414        assert_eq!(result, Value::string("\x1B"));
1415    }
1416
1417    #[test]
1418    fn test_read_string_u_escape_too_few_digits() {
1419        assert!(read(r#""\u041""#).is_err());
1420    }
1421
1422    #[test]
1423    fn test_read_string_u_escape_surrogate() {
1424        assert!(read(r#""\uD800""#).is_err());
1425    }
1426
1427    #[test]
1428    fn test_read_string_big_u_escape_basic() {
1429        let result = read(r#""\U00000041""#).unwrap();
1430        assert_eq!(result, Value::string("A"));
1431    }
1432
1433    #[test]
1434    fn test_read_string_big_u_escape_emoji() {
1435        let result = read(r#""\U0001F600""#).unwrap();
1436        assert_eq!(result, Value::string("😀"));
1437    }
1438
1439    #[test]
1440    fn test_read_string_big_u_escape_too_few_digits() {
1441        assert!(read(r#""\U0041""#).is_err());
1442    }
1443
1444    #[test]
1445    fn test_read_string_big_u_escape_invalid() {
1446        assert!(read(r#""\U00110000""#).is_err());
1447    }
1448
1449    #[test]
1450    fn test_read_string_null_escape() {
1451        let result = read(r#""\0""#).unwrap();
1452        assert_eq!(result, Value::string("\0"));
1453    }
1454
1455    #[test]
1456    fn test_read_string_mixed_escapes() {
1457        // Mix all escape types in one string
1458        let result = read(r#""\x48;\u0069\n\t""#).unwrap();
1459        assert_eq!(result, Value::string("Hi\n\t"));
1460    }
1461
1462    #[test]
1463    fn test_read_string_ansi_escape_sequence() {
1464        // Real-world: ANSI color code ESC[31m (red)
1465        let result = read(r#""\x1B;[31mRed\x1B;[0m""#).unwrap();
1466        assert_eq!(result, Value::string("\x1B[31mRed\x1B[0m"));
1467    }
1468
1469    // ── f-string tests ──
1470
1471    #[test]
1472    fn test_read_fstring_no_interpolation() {
1473        let result = read(r#"f"hello""#).unwrap();
1474        assert_eq!(
1475            result,
1476            Value::list(vec![Value::symbol("str"), Value::string("hello")])
1477        );
1478    }
1479
1480    #[test]
1481    fn test_read_fstring_single_var() {
1482        let result = read(r#"f"hello ${name}""#).unwrap();
1483        assert_eq!(
1484            result,
1485            Value::list(vec![
1486                Value::symbol("str"),
1487                Value::string("hello "),
1488                Value::symbol("name"),
1489            ])
1490        );
1491    }
1492
1493    #[test]
1494    fn test_read_fstring_multiple_vars() {
1495        let result = read(r#"f"${a} and ${b}""#).unwrap();
1496        assert_eq!(
1497            result,
1498            Value::list(vec![
1499                Value::symbol("str"),
1500                Value::symbol("a"),
1501                Value::string(" and "),
1502                Value::symbol("b"),
1503            ])
1504        );
1505    }
1506
1507    #[test]
1508    fn test_read_fstring_expression() {
1509        let result = read(r#"f"result: ${(+ 1 2)}""#).unwrap();
1510        assert_eq!(
1511            result,
1512            Value::list(vec![
1513                Value::symbol("str"),
1514                Value::string("result: "),
1515                Value::list(vec![Value::symbol("+"), Value::int(1), Value::int(2),]),
1516            ])
1517        );
1518    }
1519
1520    #[test]
1521    fn test_read_fstring_escaped_dollar() {
1522        let result = read(r#"f"costs \$5""#).unwrap();
1523        assert_eq!(
1524            result,
1525            Value::list(vec![Value::symbol("str"), Value::string("costs $5")])
1526        );
1527    }
1528
1529    #[test]
1530    fn test_read_fstring_dollar_without_brace() {
1531        let result = read(r#"f"costs $5""#).unwrap();
1532        assert_eq!(
1533            result,
1534            Value::list(vec![Value::symbol("str"), Value::string("costs $5")])
1535        );
1536    }
1537
1538    #[test]
1539    fn test_read_fstring_escape_sequences() {
1540        let result = read(r#"f"line1\nline2""#).unwrap();
1541        assert_eq!(
1542            result,
1543            Value::list(vec![Value::symbol("str"), Value::string("line1\nline2"),])
1544        );
1545    }
1546
1547    #[test]
1548    fn test_read_fstring_empty_interpolation_error() {
1549        assert!(read(r#"f"hello ${}""#).is_err());
1550    }
1551
1552    #[test]
1553    fn test_read_fstring_unterminated_interpolation_error() {
1554        assert!(read(r#"f"hello ${name""#).is_err());
1555    }
1556
1557    #[test]
1558    fn test_read_fstring_unterminated_string_error() {
1559        assert!(read(r#"f"hello"#).is_err());
1560    }
1561
1562    #[test]
1563    fn test_read_fstring_keyword_access() {
1564        let result = read(r#"f"name: ${(:name user)}""#).unwrap();
1565        assert_eq!(
1566            result,
1567            Value::list(vec![
1568                Value::symbol("str"),
1569                Value::string("name: "),
1570                Value::list(vec![Value::keyword("name"), Value::symbol("user")]),
1571            ])
1572        );
1573    }
1574
1575    #[test]
1576    fn test_read_fstring_in_list() {
1577        let result = read(r#"(println f"hello ${name}")"#).unwrap();
1578        assert_eq!(
1579            result,
1580            Value::list(vec![
1581                Value::symbol("println"),
1582                Value::list(vec![
1583                    Value::symbol("str"),
1584                    Value::string("hello "),
1585                    Value::symbol("name"),
1586                ]),
1587            ])
1588        );
1589    }
1590
1591    #[test]
1592    fn test_read_fstring_empty() {
1593        let result = read(r#"f"""#).unwrap();
1594        assert_eq!(result, Value::list(vec![Value::symbol("str")]));
1595    }
1596
1597    #[test]
1598    fn test_read_fstring_only_expr() {
1599        let result = read(r#"f"${x}""#).unwrap();
1600        assert_eq!(
1601            result,
1602            Value::list(vec![Value::symbol("str"), Value::symbol("x")])
1603        );
1604    }
1605
1606    #[test]
1607    fn test_read_f_symbol_still_works() {
1608        // Plain 'f' symbol (not followed by '"') should still parse as symbol
1609        let result = read("f").unwrap();
1610        assert_eq!(result, Value::symbol("f"));
1611    }
1612
1613    #[test]
1614    fn test_read_f_prefixed_symbol_still_works() {
1615        // 'foo' should still parse as a normal symbol
1616        let result = read("foo").unwrap();
1617        assert_eq!(result, Value::symbol("foo"));
1618    }
1619
1620    // ── short lambda tests ──
1621
1622    #[test]
1623    fn test_read_short_lambda_single_arg() {
1624        // #(+ % 1) → (lambda (%1) (+ %1 1))
1625        let result = read("#(+ % 1)").unwrap();
1626        assert_eq!(
1627            result,
1628            Value::list(vec![
1629                Value::symbol("lambda"),
1630                Value::list(vec![Value::symbol("%1")]),
1631                Value::list(vec![Value::symbol("+"), Value::symbol("%1"), Value::int(1),]),
1632            ])
1633        );
1634    }
1635
1636    #[test]
1637    fn test_read_short_lambda_two_args() {
1638        // #(+ %1 %2) → (lambda (%1 %2) (+ %1 %2))
1639        let result = read("#(+ %1 %2)").unwrap();
1640        assert_eq!(
1641            result,
1642            Value::list(vec![
1643                Value::symbol("lambda"),
1644                Value::list(vec![Value::symbol("%1"), Value::symbol("%2")]),
1645                Value::list(vec![
1646                    Value::symbol("+"),
1647                    Value::symbol("%1"),
1648                    Value::symbol("%2"),
1649                ]),
1650            ])
1651        );
1652    }
1653
1654    #[test]
1655    fn test_read_short_lambda_bare_percent_is_percent1() {
1656        // #(* % %) → (lambda (%1) (* %1 %1))
1657        let result = read("#(* % %)").unwrap();
1658        assert_eq!(
1659            result,
1660            Value::list(vec![
1661                Value::symbol("lambda"),
1662                Value::list(vec![Value::symbol("%1")]),
1663                Value::list(vec![
1664                    Value::symbol("*"),
1665                    Value::symbol("%1"),
1666                    Value::symbol("%1"),
1667                ]),
1668            ])
1669        );
1670    }
1671
1672    #[test]
1673    fn test_read_short_lambda_no_args() {
1674        // #(println "hello") → (lambda () (println "hello"))
1675        let result = read(r#"#(println "hello")"#).unwrap();
1676        assert_eq!(
1677            result,
1678            Value::list(vec![
1679                Value::symbol("lambda"),
1680                Value::list(vec![]),
1681                Value::list(vec![Value::symbol("println"), Value::string("hello"),]),
1682            ])
1683        );
1684    }
1685
1686    #[test]
1687    fn test_read_short_lambda_in_list() {
1688        // (map #(+ % 1) numbers)
1689        let result = read("(map #(+ % 1) numbers)").unwrap();
1690        assert_eq!(
1691            result,
1692            Value::list(vec![
1693                Value::symbol("map"),
1694                Value::list(vec![
1695                    Value::symbol("lambda"),
1696                    Value::list(vec![Value::symbol("%1")]),
1697                    Value::list(vec![Value::symbol("+"), Value::symbol("%1"), Value::int(1),]),
1698                ]),
1699                Value::symbol("numbers"),
1700            ])
1701        );
1702    }
1703
1704    #[test]
1705    fn test_read_short_lambda_unterminated() {
1706        assert!(read("#(+ % 1").is_err());
1707    }
1708
1709    #[test]
1710    fn test_read_short_lambda_nested_expr() {
1711        // #(> (string-length %) 3) → (lambda (%1) (> (string-length %1) 3))
1712        let result = read("#(> (string-length %) 3)").unwrap();
1713        assert_eq!(
1714            result,
1715            Value::list(vec![
1716                Value::symbol("lambda"),
1717                Value::list(vec![Value::symbol("%1")]),
1718                Value::list(vec![
1719                    Value::symbol(">"),
1720                    Value::list(vec![Value::symbol("string-length"), Value::symbol("%1"),]),
1721                    Value::int(3),
1722                ]),
1723            ])
1724        );
1725    }
1726
1727    #[test]
1728    fn test_read_regex_literal_digits() {
1729        let result = read(r#"#"\d+""#).unwrap();
1730        assert_eq!(result, Value::string(r"\d+"));
1731    }
1732
1733    #[test]
1734    fn test_read_regex_literal_char_class() {
1735        let result = read(r#"#"[a-z]+""#).unwrap();
1736        assert_eq!(result, Value::string("[a-z]+"));
1737    }
1738
1739    #[test]
1740    fn test_read_regex_literal_backslashes_literal() {
1741        let result = read(r#"#"hello\.world""#).unwrap();
1742        assert_eq!(result, Value::string(r"hello\.world"));
1743    }
1744
1745    #[test]
1746    fn test_read_regex_literal_escaped_quote() {
1747        let result = read(r#"#"foo\"bar""#).unwrap();
1748        assert_eq!(result, Value::string(r#"foo"bar"#));
1749    }
1750
1751    #[test]
1752    fn test_read_regex_literal_unterminated() {
1753        assert!(read(r#"#"abc"#).is_err());
1754    }
1755
1756    #[test]
1757    fn test_mismatched_paren_bracket() {
1758        let err = read("(list [1 2 3)").unwrap_err();
1759        let msg = err.to_string();
1760        assert!(
1761            msg.contains("mismatched"),
1762            "expected mismatched error, got: {msg}"
1763        );
1764    }
1765
1766    #[test]
1767    fn test_mismatched_bracket_paren() {
1768        let err = read("[1 2 3)").unwrap_err();
1769        let msg = err.to_string();
1770        assert!(
1771            msg.contains("mismatched"),
1772            "expected mismatched error, got: {msg}"
1773        );
1774    }
1775
1776    #[test]
1777    fn test_mismatched_paren_brace() {
1778        let err = read("(+ 1 2}").unwrap_err();
1779        let msg = err.to_string();
1780        assert!(
1781            msg.contains("mismatched"),
1782            "expected mismatched error, got: {msg}"
1783        );
1784    }
1785
1786    #[test]
1787    fn test_mismatched_brace_paren() {
1788        let err = read("{:a 1)").unwrap_err();
1789        let msg = err.to_string();
1790        assert!(
1791            msg.contains("mismatched"),
1792            "expected mismatched error, got: {msg}"
1793        );
1794    }
1795
1796    #[test]
1797    fn test_mismatched_brace_bracket() {
1798        let err = read("{:a 1]").unwrap_err();
1799        let msg = err.to_string();
1800        assert!(
1801            msg.contains("mismatched"),
1802            "expected mismatched error, got: {msg}"
1803        );
1804    }
1805
1806    #[test]
1807    fn test_mismatched_bracket_brace() {
1808        let err = read("[1 2}").unwrap_err();
1809        let msg = err.to_string();
1810        assert!(
1811            msg.contains("mismatched"),
1812            "expected mismatched error, got: {msg}"
1813        );
1814    }
1815
1816    #[test]
1817    fn test_correct_brackets_still_work() {
1818        assert!(read("(list [1 2 3])").is_ok());
1819        assert!(read("{:a 1}").is_ok());
1820        assert!(read("[1 [2 3] 4]").is_ok());
1821    }
1822
1823    #[test]
1824    fn test_auto_gensym_symbol_parsing() {
1825        let val = read("v#").unwrap();
1826        assert_eq!(val.as_symbol().unwrap(), "v#");
1827
1828        let val = read("tmp#").unwrap();
1829        assert_eq!(val.as_symbol().unwrap(), "tmp#");
1830
1831        let val = read("`(let ((v# 1)) v#)").unwrap();
1832        let items = val.as_list().unwrap();
1833        assert_eq!(items[0].as_symbol().unwrap(), "quasiquote");
1834    }
1835
1836    #[test]
1837    fn test_hash_reader_dispatch_still_works() {
1838        let val = read("#t").unwrap();
1839        assert_eq!(val.as_bool(), Some(true));
1840
1841        let val = read("#f").unwrap();
1842        assert_eq!(val.as_bool(), Some(false));
1843
1844        let val = read("#\\space").unwrap();
1845        assert_eq!(val.as_char(), Some(' '));
1846
1847        let val = read("#(+ % 1)").unwrap();
1848        assert!(val.as_list().is_some());
1849    }
1850
1851    #[test]
1852    fn test_auto_gensym_edge_cases() {
1853        let val = read("x##").unwrap();
1854        assert_eq!(val.as_symbol().unwrap(), "x##");
1855
1856        let val = read(":foo").unwrap();
1857        assert!(val.as_keyword().is_some());
1858    }
1859
1860    // ── Error recovery tests ─────────────────────────────────────
1861
1862    #[test]
1863    fn recover_valid_input_no_errors() {
1864        let (exprs, _, _, errors) = read_many_with_spans_recover("(+ 1 2) (- 3 4)");
1865        assert!(errors.is_empty());
1866        assert_eq!(exprs.len(), 2);
1867    }
1868
1869    #[test]
1870    fn recover_stray_closer_then_valid() {
1871        // Stray `)` then a valid form
1872        let (exprs, _, _, errors) = read_many_with_spans_recover(") (+ 1 2)");
1873        assert_eq!(errors.len(), 1);
1874        assert_eq!(exprs.len(), 1);
1875    }
1876
1877    #[test]
1878    fn recover_unclosed_then_valid() {
1879        // Unclosed list, then a valid form on the next line
1880        let (_exprs, _, _, errors) = read_many_with_spans_recover("(define x\n(+ 1 2)");
1881        // The first `(define x` consumes tokens including `(+ 1 2)` as part of
1882        // its unterminated body, then hits EOF → 1 error, the (+ 1 2) is inside it
1883        assert_eq!(errors.len(), 1);
1884        // The second form got consumed by the unterminated first form
1885        // so recovery can't salvage it — this is expected
1886    }
1887
1888    #[test]
1889    fn recover_multiple_stray_closers() {
1890        let (exprs, _, _, errors) = read_many_with_spans_recover(") ] } (define x 1)");
1891        assert_eq!(errors.len(), 3);
1892        assert_eq!(exprs.len(), 1);
1893        assert!(exprs[0].as_list().is_some());
1894    }
1895
1896    #[test]
1897    fn recover_mismatched_bracket() {
1898        // Mismatched bracket: ( closed with ]
1899        let (exprs, _, _, errors) = read_many_with_spans_recover("(define x] (+ 1 2)");
1900        assert!(!errors.is_empty());
1901        // After the mismatch error, recovery should find `(+ 1 2)`
1902        assert!(!exprs.is_empty());
1903    }
1904
1905    #[test]
1906    fn recover_empty_input() {
1907        let (exprs, _, _, errors) = read_many_with_spans_recover("");
1908        assert!(errors.is_empty());
1909        assert!(exprs.is_empty());
1910    }
1911
1912    #[test]
1913    fn recover_only_errors() {
1914        let (exprs, _, _, errors) = read_many_with_spans_recover(") )");
1915        assert_eq!(errors.len(), 2);
1916        assert!(exprs.is_empty());
1917    }
1918
1919    #[test]
1920    fn recover_valid_between_errors() {
1921        // error, valid, error
1922        let (exprs, _, _, errors) = read_many_with_spans_recover(") (+ 1 2) )");
1923        assert_eq!(errors.len(), 2);
1924        assert_eq!(exprs.len(), 1);
1925    }
1926
1927    // ── symbol span tracking ──
1928
1929    #[test]
1930    fn test_symbol_spans_basic() {
1931        let (_, _, sym_spans) = read_many_with_symbol_spans("(define x 42)").unwrap();
1932        // Should record "define" and "x" (not 42 — it's an int, not a symbol)
1933        let names: Vec<&str> = sym_spans.iter().map(|(n, _)| n.as_str()).collect();
1934        assert!(names.contains(&"define"), "missing define in {:?}", names);
1935        assert!(names.contains(&"x"), "missing x in {:?}", names);
1936        assert_eq!(names.len(), 2);
1937    }
1938
1939    #[test]
1940    fn test_symbol_spans_positions() {
1941        let (_, _, sym_spans) = read_many_with_symbol_spans("(defun foo (x) x)").unwrap();
1942        // "foo" should have a precise span
1943        let foo = sym_spans.iter().find(|(n, _)| n == "foo").unwrap();
1944        assert_eq!(foo.1.line, 1);
1945        assert_eq!(foo.1.col, 8); // 1-indexed: "(defun " = 7 chars, foo starts at col 8
1946    }
1947
1948    #[test]
1949    fn test_symbol_spans_no_synthetic() {
1950        // '(a b) desugars to (quote (a b)) — "quote" should NOT appear in symbol_spans
1951        let (_, _, sym_spans) = read_many_with_symbol_spans("'(a b)").unwrap();
1952        let names: Vec<&str> = sym_spans.iter().map(|(n, _)| n.as_str()).collect();
1953        assert!(
1954            !names.contains(&"quote"),
1955            "synthetic 'quote' should not be in symbol_spans"
1956        );
1957        assert!(names.contains(&"a"));
1958        assert!(names.contains(&"b"));
1959    }
1960
1961    #[test]
1962    fn test_symbol_spans_multiple_forms() {
1963        let (_, _, sym_spans) =
1964            read_many_with_symbol_spans("(define x 1)\n(defun f (a) a)").unwrap();
1965        let names: Vec<&str> = sym_spans.iter().map(|(n, _)| n.as_str()).collect();
1966        assert!(names.contains(&"define"));
1967        assert!(names.contains(&"x"));
1968        assert!(names.contains(&"defun"));
1969        assert!(names.contains(&"f"));
1970        assert!(names.contains(&"a"));
1971        // "a" should appear twice (param + body reference)
1972        assert_eq!(names.iter().filter(|&&n| n == "a").count(), 2);
1973    }
1974
1975    #[test]
1976    fn test_symbol_spans_nil_excluded() {
1977        // "nil" parses as Value::nil(), not a symbol — should not be in symbol_spans
1978        let (_, _, sym_spans) = read_many_with_symbol_spans("nil").unwrap();
1979        assert!(sym_spans.is_empty());
1980    }
1981}