Skip to main content

sema_reader/
reader.rs

1use std::collections::BTreeMap;
2use std::rc::Rc;
3
4use sema_core::{resolve, SemaError, Span, SpanMap, Value, ValueView};
5
6use crate::lexer::{tokenize, FStringPart, SpannedToken, Token};
7
8/// Maximum nesting depth for parsing. Untrusted input (files, the WASM
9/// playground, f-string interpolations) must not be able to overflow the thread
10/// stack via thousands of nested forms. 1024 is far beyond any real program.
11const MAX_PARSE_DEPTH: usize = 1024;
12
13struct Parser {
14    tokens: Vec<SpannedToken>,
15    pos: usize,
16    span_map: SpanMap,
17    symbol_spans: Vec<(String, Span)>,
18    depth: usize,
19}
20
21impl Parser {
22    fn new(tokens: Vec<SpannedToken>) -> Self {
23        Parser {
24            tokens,
25            pos: 0,
26            span_map: SpanMap::new(),
27            symbol_spans: Vec::new(),
28            depth: 0,
29        }
30    }
31
32    fn peek(&self) -> Option<&Token> {
33        let mut pos = self.pos;
34        while let Some(t) = self.tokens.get(pos) {
35            match &t.token {
36                Token::Comment(_) | Token::Newline => pos += 1,
37                _ => return Some(&t.token),
38            }
39        }
40        None
41    }
42
43    fn span(&self) -> Span {
44        let mut pos = self.pos;
45        while let Some(t) = self.tokens.get(pos) {
46            match &t.token {
47                Token::Comment(_) | Token::Newline => pos += 1,
48                _ => return t.span,
49            }
50        }
51        Span::point(0, 0)
52    }
53
54    fn skip_trivia(&mut self) {
55        while let Some(t) = self.tokens.get(self.pos) {
56            match &t.token {
57                Token::Comment(_) | Token::Newline => self.pos += 1,
58                _ => break,
59            }
60        }
61    }
62
63    fn advance(&mut self) -> Option<&SpannedToken> {
64        self.skip_trivia();
65        let tok = self.tokens.get(self.pos);
66        if tok.is_some() {
67            self.pos += 1;
68        }
69        tok
70    }
71
72    fn expect(&mut self, expected: &Token) -> Result<(), SemaError> {
73        let span = self.span();
74        match self.advance() {
75            Some(t) if &t.token == expected => Ok(()),
76            Some(t) => Err(SemaError::Reader {
77                message: format!(
78                    "expected `{}`, got `{}`",
79                    token_display(expected),
80                    token_display(&t.token)
81                ),
82                span,
83            }),
84            None => Err(SemaError::Reader {
85                message: format!("expected `{}`, got end of input", token_display(expected)),
86                span,
87            }),
88        }
89    }
90
91    fn parse_expr(&mut self) -> Result<Value, SemaError> {
92        // Bound recursion depth on the single common entry point: every nested
93        // form (list/vector/map/short-lambda elements) recurses through here.
94        self.depth += 1;
95        if self.depth > MAX_PARSE_DEPTH {
96            self.depth -= 1;
97            return Err(SemaError::Reader {
98                message: format!("input nested too deeply (limit {MAX_PARSE_DEPTH})"),
99                span: self.span(),
100            }
101            .with_hint("reduce nesting depth"));
102        }
103        let result = self.parse_expr_inner();
104        self.depth -= 1;
105        result
106    }
107
108    fn parse_expr_inner(&mut self) -> Result<Value, SemaError> {
109        let span = self.span();
110        match self.peek() {
111            None => Err(SemaError::Reader {
112                message: "unexpected end of input".to_string(),
113                span,
114            }),
115            Some(Token::LParen) => self.parse_list(),
116            Some(Token::LBracket) => self.parse_vector(),
117            Some(Token::LBrace) => self.parse_map(),
118            Some(Token::Quote) => {
119                self.advance();
120                let inner = self.parse_expr().map_err(|_| {
121                    SemaError::Reader {
122                        message: "quote (') requires an expression after it".to_string(),
123                        span,
124                    }
125                    .with_hint("e.g. '(1 2 3) or 'foo")
126                })?;
127                self.make_list_with_span(vec![Value::symbol("quote"), inner], span)
128            }
129            Some(Token::Quasiquote) => {
130                self.advance();
131                let inner = self.parse_expr().map_err(|_| {
132                    SemaError::Reader {
133                        message: "quasiquote (`) requires an expression after it".to_string(),
134                        span,
135                    }
136                    .with_hint("e.g. `(list ,x)")
137                })?;
138                self.make_list_with_span(vec![Value::symbol("quasiquote"), inner], span)
139            }
140            Some(Token::Unquote) => {
141                self.advance();
142                let inner = self.parse_expr().map_err(|_| {
143                    SemaError::Reader {
144                        message: "unquote (,) requires an expression after it".to_string(),
145                        span,
146                    }
147                    .with_hint("use inside quasiquote, e.g. `(list ,x)")
148                })?;
149                self.make_list_with_span(vec![Value::symbol("unquote"), inner], span)
150            }
151            Some(Token::UnquoteSplice) => {
152                self.advance();
153                let inner = self.parse_expr().map_err(|_| {
154                    SemaError::Reader {
155                        message: "unquote-splicing (,@) requires an expression after it"
156                            .to_string(),
157                        span,
158                    }
159                    .with_hint("use inside quasiquote, e.g. `(list ,@xs)")
160                })?;
161                self.make_list_with_span(vec![Value::symbol("unquote-splicing"), inner], span)
162            }
163            Some(Token::BytevectorStart) => self.parse_bytevector(),
164            Some(Token::ShortLambdaStart) => self.parse_short_lambda(),
165            Some(_) => {
166                let val = self.parse_atom()?;
167                if let Some(name) = val.as_symbol() {
168                    self.symbol_spans.push((name, span));
169                }
170                Ok(val)
171            }
172        }
173    }
174
175    fn make_list_with_span(&mut self, items: Vec<Value>, span: Span) -> Result<Value, SemaError> {
176        let rc = Rc::new(items);
177        let ptr = Rc::as_ptr(&rc) as usize;
178        self.span_map.insert(ptr, span);
179        Ok(Value::list_from_rc(rc))
180    }
181
182    /// Get the span of the previously consumed token (the one at pos-1).
183    fn prev_span(&self) -> Span {
184        if self.pos > 0 {
185            self.tokens[self.pos - 1].span
186        } else {
187            Span::point(0, 0)
188        }
189    }
190
191    fn parse_list(&mut self) -> Result<Value, SemaError> {
192        let open_span = self.span();
193        self.expect(&Token::LParen)?;
194        let mut items = Vec::new();
195        while self.peek() != Some(&Token::RParen) {
196            if self.peek().is_none() {
197                return Err(SemaError::Reader {
198                    message: "unterminated list".to_string(),
199                    span: open_span,
200                }
201                .with_hint("add a closing `)`"));
202            }
203            if self.peek() == Some(&Token::RBracket) {
204                return Err(SemaError::Reader {
205                    message: "mismatched bracket: expected `)` to close `(`, found `]`".to_string(),
206                    span: self.span(),
207                }
208                .with_hint("this list was opened with `(` — close it with `)`"));
209            }
210            if self.peek() == Some(&Token::RBrace) {
211                return Err(SemaError::Reader {
212                    message: "mismatched bracket: expected `)` to close `(`, found `}`".to_string(),
213                    span: self.span(),
214                }
215                .with_hint("this list was opened with `(` — close it with `)`"));
216            }
217            // Handle dotted pairs: (a . b)
218            if self.peek() == Some(&Token::Dot) {
219                self.advance(); // skip dot
220                let cdr = self.parse_expr()?;
221                self.expect(&Token::RParen)?;
222                let close = self.prev_span();
223                items.push(Value::symbol("."));
224                items.push(cdr);
225                return self.make_list_with_span(items, open_span.to(&close));
226            }
227            items.push(self.parse_expr()?);
228        }
229        self.expect(&Token::RParen)?;
230        let close = self.prev_span();
231        self.make_list_with_span(items, open_span.to(&close))
232    }
233
234    fn parse_vector(&mut self) -> Result<Value, SemaError> {
235        let open_span = self.span();
236        self.expect(&Token::LBracket)?;
237        let mut items = Vec::new();
238        while self.peek() != Some(&Token::RBracket) {
239            if self.peek().is_none() {
240                return Err(SemaError::Reader {
241                    message: "unterminated vector".to_string(),
242                    span: open_span,
243                }
244                .with_hint("add a closing `]`"));
245            }
246            if self.peek() == Some(&Token::RParen) {
247                return Err(SemaError::Reader {
248                    message: "mismatched bracket: expected `]` to close `[`, found `)`".to_string(),
249                    span: self.span(),
250                }
251                .with_hint("this vector was opened with `[` — close it with `]`"));
252            }
253            if self.peek() == Some(&Token::RBrace) {
254                return Err(SemaError::Reader {
255                    message: "mismatched bracket: expected `]` to close `[`, found `}`".to_string(),
256                    span: self.span(),
257                }
258                .with_hint("this vector was opened with `[` — close it with `]`"));
259            }
260            items.push(self.parse_expr()?);
261        }
262        self.expect(&Token::RBracket)?;
263        let close = self.prev_span();
264        let rc = Rc::new(items);
265        let ptr = Rc::as_ptr(&rc) as usize;
266        self.span_map.insert(ptr, open_span.to(&close));
267        Ok(Value::vector_from_rc(rc))
268    }
269
270    fn parse_map(&mut self) -> Result<Value, SemaError> {
271        let open_span = self.span();
272        self.expect(&Token::LBrace)?;
273        let mut map = BTreeMap::new();
274        while self.peek() != Some(&Token::RBrace) {
275            if self.peek().is_none() {
276                return Err(SemaError::Reader {
277                    message: "unterminated map".to_string(),
278                    span: open_span,
279                }
280                .with_hint("add a closing `}`"));
281            }
282            if self.peek() == Some(&Token::RParen) {
283                return Err(SemaError::Reader {
284                    message: "mismatched bracket: expected `}` to close `{`, found `)`".to_string(),
285                    span: self.span(),
286                }
287                .with_hint("this map was opened with `{` — close it with `}`"));
288            }
289            if self.peek() == Some(&Token::RBracket) {
290                return Err(SemaError::Reader {
291                    message: "mismatched bracket: expected `}` to close `{`, found `]`".to_string(),
292                    span: self.span(),
293                }
294                .with_hint("this map was opened with `{` — close it with `}`"));
295            }
296            let key = self.parse_expr()?;
297            if self.peek() == Some(&Token::RBrace) || self.peek().is_none() {
298                return Err(SemaError::Reader {
299                    message: "map literal must have even number of forms".to_string(),
300                    span: self.span(),
301                });
302            }
303            let val = self.parse_expr()?;
304            map.insert(key, val);
305        }
306        self.expect(&Token::RBrace)?;
307        Ok(Value::map(map))
308    }
309
310    fn parse_bytevector(&mut self) -> Result<Value, SemaError> {
311        let open_span = self.span();
312        self.advance(); // consume BytevectorStart token
313        let mut bytes = Vec::new();
314        while self.peek() != Some(&Token::RParen) {
315            if self.peek().is_none() {
316                return Err(SemaError::Reader {
317                    message: "unterminated bytevector".to_string(),
318                    span: open_span,
319                }
320                .with_hint("add a closing `)`"));
321            }
322            let span = self.span();
323            match self.peek() {
324                Some(Token::Int(n)) => {
325                    let n = *n;
326                    self.advance();
327                    if !(0..=255).contains(&n) {
328                        return Err(SemaError::Reader {
329                            message: format!("#u8(...): byte value {n} out of range 0..255"),
330                            span,
331                        });
332                    }
333                    bytes.push(n as u8);
334                }
335                _ => {
336                    return Err(SemaError::Reader {
337                        message: "#u8(...): expected integer byte value".to_string(),
338                        span,
339                    });
340                }
341            }
342        }
343        self.expect(&Token::RParen)?;
344        Ok(Value::bytevector(bytes))
345    }
346
347    fn parse_short_lambda(&mut self) -> Result<Value, SemaError> {
348        let open_span = self.span();
349        self.advance(); // consume ShortLambdaStart
350        let mut body_items = Vec::new();
351        while self.peek() != Some(&Token::RParen) {
352            if self.peek().is_none() {
353                return Err(SemaError::Reader {
354                    message: "unterminated short lambda #(...)".to_string(),
355                    span: open_span,
356                }
357                .with_hint("add a closing `)`"));
358            }
359            body_items.push(self.parse_expr()?);
360        }
361        self.expect(&Token::RParen)?;
362
363        // Build the body as a single list form: (fn-name arg1 arg2 ...)
364        let body = Value::list(body_items);
365
366        // Scan body for % / %1 / %2 etc., rewrite % → %1
367        let mut max_arg: usize = 0;
368        let body = rewrite_percent_args(&body, &mut max_arg);
369
370        // Build parameter list
371        let params: Vec<Value> = if max_arg == 0 {
372            vec![]
373        } else {
374            (1..=max_arg)
375                .map(|n| Value::symbol(&format!("%{}", n)))
376                .collect()
377        };
378
379        Ok(Value::list(vec![
380            Value::symbol("lambda"),
381            Value::list(params),
382            body,
383        ]))
384    }
385
386    /// After a parse error, skip tokens until we reach a position that
387    /// could plausibly start a new top-level expression (depth-0 open bracket,
388    /// quote, or atom). This enables error recovery in `read_many_recover`.
389    fn recover_to_next_expr(&mut self) {
390        let mut depth: usize = 0;
391        while let Some(tok) = self.peek() {
392            match tok {
393                // Opening brackets increase depth
394                Token::LParen
395                | Token::LBracket
396                | Token::LBrace
397                | Token::ShortLambdaStart
398                | Token::BytevectorStart => {
399                    if depth == 0 {
400                        // This could start a new top-level form — stop here
401                        return;
402                    }
403                    self.advance();
404                    depth += 1;
405                }
406                // Closing brackets decrease depth
407                Token::RParen | Token::RBracket | Token::RBrace => {
408                    if depth == 0 {
409                        // Stray closer at top level — stop and let parse_expr report it
410                        return;
411                    }
412                    self.advance();
413                    depth -= 1;
414                }
415                // Quote-like prefixes at depth 0 could start a new form
416                Token::Quote | Token::Quasiquote | Token::Unquote | Token::UnquoteSplice => {
417                    if depth == 0 {
418                        return;
419                    }
420                    self.advance();
421                }
422                // Atoms at depth 0 could be a top-level expression
423                _ => {
424                    if depth == 0 {
425                        return;
426                    }
427                    self.advance();
428                }
429            }
430        }
431    }
432
433    fn parse_atom(&mut self) -> Result<Value, SemaError> {
434        let span = self.span();
435        match self.advance() {
436            Some(SpannedToken {
437                token: Token::Int(n),
438                ..
439            }) => Ok(Value::int(*n)),
440            Some(SpannedToken {
441                token: Token::Float(f),
442                ..
443            }) => Ok(Value::float(*f)),
444            Some(SpannedToken {
445                token: Token::String(s),
446                ..
447            }) => Ok(Value::string(s)),
448            Some(SpannedToken {
449                token: Token::Regex(s),
450                ..
451            }) => Ok(Value::string(s)),
452            Some(SpannedToken {
453                token: Token::Symbol(s),
454                ..
455            }) => {
456                if s == "nil" {
457                    Ok(Value::nil())
458                } else {
459                    Ok(Value::symbol(s))
460                }
461            }
462            Some(SpannedToken {
463                token: Token::Keyword(s),
464                ..
465            }) => Ok(Value::keyword(s)),
466            Some(SpannedToken {
467                token: Token::Bool(b),
468                ..
469            }) => Ok(Value::bool(*b)),
470            Some(SpannedToken {
471                token: Token::Char(c),
472                ..
473            }) => Ok(Value::char(*c)),
474            Some(SpannedToken {
475                token: Token::FString(parts),
476                ..
477            }) => {
478                let parts = parts.clone();
479                let mut items = vec![Value::symbol("str")];
480                for part in &parts {
481                    match part {
482                        FStringPart::Literal(s) => {
483                            if !s.is_empty() {
484                                items.push(Value::string(s));
485                            }
486                        }
487                        FStringPart::Expr(src) => {
488                            let val = read(src)?;
489                            items.push(val);
490                        }
491                    }
492                }
493                Ok(Value::list(items))
494            }
495            Some(t) => {
496                let (name, hint) = match &t.token {
497                    Token::RParen => (
498                        "unexpected closing `)`",
499                        Some("no matching opening parenthesis"),
500                    ),
501                    Token::RBracket => (
502                        "unexpected closing `]`",
503                        Some("no matching opening bracket"),
504                    ),
505                    Token::RBrace => ("unexpected closing `}`", Some("no matching opening brace")),
506                    Token::Dot => (
507                        "unexpected `.`",
508                        Some("dots are used in pair notation, e.g. (a . b)"),
509                    ),
510                    _ => ("unexpected token", None),
511                };
512                let err = SemaError::Reader {
513                    message: name.to_string(),
514                    span,
515                };
516                Err(if let Some(h) = hint {
517                    err.with_hint(h)
518                } else {
519                    err
520                })
521            }
522            None => Err(SemaError::Reader {
523                message: "unexpected end of input".to_string(),
524                span,
525            }),
526        }
527    }
528}
529
530fn token_display(tok: &Token) -> &'static str {
531    match tok {
532        Token::LParen => "(",
533        Token::RParen => ")",
534        Token::LBracket => "[",
535        Token::RBracket => "]",
536        Token::LBrace => "{",
537        Token::RBrace => "}",
538        Token::Quote => "'",
539        Token::Quasiquote => "`",
540        Token::Unquote => ",",
541        Token::UnquoteSplice => ",@",
542        Token::Dot => ".",
543        Token::BytevectorStart => "#u8(",
544        Token::Int(_) => "integer",
545        Token::Float(_) => "float",
546        Token::String(_) => "string",
547        Token::Symbol(_) => "symbol",
548        Token::Keyword(_) => "keyword",
549        Token::Bool(_) => "boolean",
550        Token::Char(_) => "character",
551        Token::FString(_) => "f-string",
552        Token::ShortLambdaStart => "#(",
553        Token::Comment(_) => "comment",
554        Token::Newline => "newline",
555        Token::Regex(_) => "regex",
556    }
557}
558
559/// Recursively scan a Value AST for `%`, `%1`, `%2`, etc. symbols.
560/// Rewrites bare `%` to `%1`. Tracks the highest numbered arg in `max_arg`.
561/// Skips recursion into nested `(lambda ...)` / `(fn ...)` forms.
562fn rewrite_percent_args(expr: &Value, max_arg: &mut usize) -> Value {
563    match expr.view() {
564        ValueView::Symbol(spur) => {
565            let name = resolve(spur);
566            if name == "%" {
567                *max_arg = (*max_arg).max(1);
568                Value::symbol("%1")
569            } else if let Some(rest) = name.strip_prefix('%') {
570                if let Ok(n) = rest.parse::<usize>() {
571                    if n > 0 {
572                        *max_arg = (*max_arg).max(n);
573                    }
574                }
575                expr.clone()
576            } else {
577                expr.clone()
578            }
579        }
580        ValueView::List(items) => {
581            // Skip nested (lambda ...) / (fn ...) forms — their % args are their own
582            if let Some(first) = items.first() {
583                if let ValueView::Symbol(s) = first.view() {
584                    let name = resolve(s);
585                    if name == "lambda" || name == "fn" {
586                        return expr.clone();
587                    }
588                }
589            }
590            let new_items: Vec<Value> = items
591                .iter()
592                .map(|item| rewrite_percent_args(item, max_arg))
593                .collect();
594            Value::list(new_items)
595        }
596        ValueView::Vector(items) => {
597            let new_items: Vec<Value> = items
598                .iter()
599                .map(|item| rewrite_percent_args(item, max_arg))
600                .collect();
601            Value::vector(new_items)
602        }
603        _ => expr.clone(),
604    }
605}
606
607/// Read a single s-expression from a string.
608pub fn read(input: &str) -> Result<Value, SemaError> {
609    let tokens = tokenize(input)?;
610    let mut parser = Parser::new(tokens);
611    if parser.peek().is_none() {
612        return Ok(Value::nil());
613    }
614    parser.parse_expr()
615}
616
617/// Read all s-expressions from a string.
618pub fn read_many(input: &str) -> Result<Vec<Value>, SemaError> {
619    let tokens = tokenize(input)?;
620    let mut parser = Parser::new(tokens);
621    let mut exprs = Vec::new();
622    while parser.peek().is_some() {
623        exprs.push(parser.parse_expr()?);
624    }
625    Ok(exprs)
626}
627
628/// Read all s-expressions and return the accumulated span map.
629pub fn read_many_with_spans(input: &str) -> Result<(Vec<Value>, SpanMap), SemaError> {
630    let tokens = tokenize(input)?;
631    let mut parser = Parser::new(tokens);
632    let mut exprs = Vec::new();
633    while parser.peek().is_some() {
634        exprs.push(parser.parse_expr()?);
635    }
636    Ok((exprs, parser.span_map))
637}
638
639/// Read all s-expressions and return spans for both compound expressions and individual symbols.
640/// Symbol spans enable precise go-to-definition (jumping to the name, not the whole form).
641#[allow(clippy::type_complexity)]
642pub fn read_many_with_symbol_spans(
643    input: &str,
644) -> Result<(Vec<Value>, SpanMap, Vec<(String, Span)>), SemaError> {
645    let tokens = tokenize(input)?;
646    let mut parser = Parser::new(tokens);
647    let mut exprs = Vec::new();
648    while parser.peek().is_some() {
649        exprs.push(parser.parse_expr()?);
650    }
651    Ok((exprs, parser.span_map, parser.symbol_spans))
652}
653
654/// Read all s-expressions with error recovery.
655/// On parse errors, skips to the next top-level form and continues.
656/// Returns (successfully parsed forms, span map, collected errors).
657/// Tokenizer errors are returned as a single error with no parsed forms.
658#[allow(clippy::type_complexity)]
659pub fn read_many_with_spans_recover(
660    input: &str,
661) -> (Vec<Value>, SpanMap, Vec<(String, Span)>, Vec<SemaError>) {
662    let tokens = match tokenize(input) {
663        Ok(t) => t,
664        Err(e) => return (vec![], SpanMap::new(), vec![], vec![e]),
665    };
666    let mut parser = Parser::new(tokens);
667    let mut exprs = Vec::new();
668    let mut errors = Vec::new();
669    while parser.peek().is_some() {
670        match parser.parse_expr() {
671            Ok(expr) => exprs.push(expr),
672            Err(err) => {
673                errors.push(err);
674                parser.recover_to_next_expr();
675            }
676        }
677    }
678    (exprs, parser.span_map, parser.symbol_spans, errors)
679}
680
681#[cfg(test)]
682#[allow(clippy::approx_constant)]
683mod tests {
684    use super::*;
685
686    #[test]
687    fn test_read_int() {
688        assert_eq!(read("42").unwrap(), Value::int(42));
689    }
690
691    #[test]
692    fn deeply_nested_input_errors_instead_of_overflowing() {
693        // Untrusted input with thousands of levels of nesting must return a
694        // reader error rather than recurse to a stack overflow. Run on a large
695        // stack so the result reflects the depth-limit check, not the small
696        // default test-thread stack (which would SIGSEGV either way).
697        let result = std::thread::Builder::new()
698            .stack_size(16 * 1024 * 1024)
699            .spawn(|| {
700                let depth = 3000;
701                let src = format!("{}{}", "[".repeat(depth), "]".repeat(depth));
702                read(&src).is_err()
703            })
704            .unwrap()
705            .join()
706            .expect("parser must not overflow the stack on deeply nested input");
707        assert!(
708            result,
709            "expected a depth-limit error for deeply nested input"
710        );
711    }
712
713    #[test]
714    fn test_read_negative_int() {
715        assert_eq!(read("-7").unwrap(), Value::int(-7));
716    }
717
718    #[test]
719    fn test_read_float() {
720        assert_eq!(read("3.14").unwrap(), Value::float(3.14));
721    }
722
723    #[test]
724    fn test_read_string() {
725        assert_eq!(read("\"hello\"").unwrap(), Value::string("hello"));
726    }
727
728    #[test]
729    fn test_read_symbol() {
730        assert_eq!(read("foo").unwrap(), Value::symbol("foo"));
731    }
732
733    #[test]
734    fn test_read_keyword() {
735        assert_eq!(read(":bar").unwrap(), Value::keyword("bar"));
736    }
737
738    #[test]
739    fn test_read_bool() {
740        assert_eq!(read("#t").unwrap(), Value::bool(true));
741        assert_eq!(read("#f").unwrap(), Value::bool(false));
742    }
743
744    #[test]
745    fn test_read_list() {
746        let result = read("(+ 1 2)").unwrap();
747        assert_eq!(
748            result,
749            Value::list(vec![Value::symbol("+"), Value::int(1), Value::int(2)])
750        );
751    }
752
753    #[test]
754    fn test_read_nested_list() {
755        let result = read("(* (+ 1 2) 3)").unwrap();
756        assert_eq!(
757            result,
758            Value::list(vec![
759                Value::symbol("*"),
760                Value::list(vec![Value::symbol("+"), Value::int(1), Value::int(2)]),
761                Value::int(3)
762            ])
763        );
764    }
765
766    #[test]
767    fn test_read_vector() {
768        let result = read("[1 2 3]").unwrap();
769        assert_eq!(
770            result,
771            Value::vector(vec![Value::int(1), Value::int(2), Value::int(3)])
772        );
773    }
774
775    #[test]
776    fn test_read_map() {
777        let result = read("{:a 1 :b 2}").unwrap();
778        let mut expected = BTreeMap::new();
779        expected.insert(Value::keyword("a"), Value::int(1));
780        expected.insert(Value::keyword("b"), Value::int(2));
781        assert_eq!(result, Value::map(expected));
782    }
783
784    #[test]
785    fn test_read_quote() {
786        let result = read("'foo").unwrap();
787        assert_eq!(
788            result,
789            Value::list(vec![Value::symbol("quote"), Value::symbol("foo")])
790        );
791    }
792
793    #[test]
794    fn test_read_quasiquote() {
795        let result = read("`(a ,b ,@c)").unwrap();
796        assert_eq!(
797            result,
798            Value::list(vec![
799                Value::symbol("quasiquote"),
800                Value::list(vec![
801                    Value::symbol("a"),
802                    Value::list(vec![Value::symbol("unquote"), Value::symbol("b")]),
803                    Value::list(vec![Value::symbol("unquote-splicing"), Value::symbol("c")]),
804                ])
805            ])
806        );
807    }
808
809    #[test]
810    fn test_read_nil() {
811        assert_eq!(read("nil").unwrap(), Value::nil());
812    }
813
814    #[test]
815    fn test_read_many_exprs() {
816        let results = read_many("1 2 3").unwrap();
817        assert_eq!(results, vec![Value::int(1), Value::int(2), Value::int(3)]);
818    }
819
820    #[test]
821    fn test_comments() {
822        let result = read_many("; comment\n(+ 1 2)").unwrap();
823        assert_eq!(result.len(), 1);
824    }
825
826    #[test]
827    fn test_read_zero() {
828        assert_eq!(read("0").unwrap(), Value::int(0));
829    }
830
831    #[test]
832    fn test_read_negative_zero() {
833        assert_eq!(read("-0").unwrap(), Value::int(0));
834    }
835
836    #[test]
837    fn test_read_leading_zeros() {
838        assert_eq!(read("007").unwrap(), Value::int(7));
839    }
840
841    #[test]
842    fn test_read_large_int() {
843        assert_eq!(read("9999999999999").unwrap(), Value::int(9999999999999));
844    }
845
846    #[test]
847    fn test_read_int_overflow() {
848        // i64::MAX + 1 should error, not silently wrap
849        assert!(read("9999999999999999999999").is_err());
850    }
851
852    #[test]
853    fn test_read_negative_float() {
854        assert_eq!(read("-2.5").unwrap(), Value::float(-2.5));
855    }
856
857    #[test]
858    fn test_read_float_leading_zero() {
859        assert_eq!(read("0.5").unwrap(), Value::float(0.5));
860    }
861
862    #[test]
863    fn test_read_minus_is_symbol() {
864        // Bare `-` should be a symbol (subtraction operator), not a number
865        assert_eq!(read("-").unwrap(), Value::symbol("-"));
866    }
867
868    #[test]
869    fn test_read_minus_in_list() {
870        // `(- 3)` should parse as call to `-` with arg 3
871        let result = read("(- 3)").unwrap();
872        assert_eq!(result, Value::list(vec![Value::symbol("-"), Value::int(3)]));
873    }
874
875    #[test]
876    fn test_read_negative_in_list() {
877        // `(-3)` should parse as list containing -3
878        let result = read("(-3)").unwrap();
879        assert_eq!(result, Value::list(vec![Value::int(-3)]));
880    }
881
882    #[test]
883    fn test_read_empty_string() {
884        assert_eq!(read(r#""""#).unwrap(), Value::string(""));
885    }
886
887    #[test]
888    fn test_read_string_with_escapes() {
889        assert_eq!(
890            read(r#""\n\t\r\\\"" "#).unwrap(),
891            Value::string("\n\t\r\\\"")
892        );
893    }
894
895    #[test]
896    fn test_read_string_unknown_escape() {
897        // Unknown escape sequences are preserved literally
898        assert_eq!(read(r#""\z""#).unwrap(), Value::string("\\z"));
899    }
900
901    #[test]
902    fn test_read_string_with_newline() {
903        assert_eq!(
904            read("\"line1\nline2\"").unwrap(),
905            Value::string("line1\nline2")
906        );
907    }
908
909    #[test]
910    fn test_read_unterminated_string() {
911        assert!(read("\"hello").is_err());
912    }
913
914    #[test]
915    fn test_read_string_escaped_quote_at_end() {
916        // `"test\"` — the backslash escapes the quote, string is unterminated
917        assert!(read(r#""test\""#).is_err());
918    }
919
920    #[test]
921    fn test_read_string_with_unicode() {
922        assert_eq!(read("\"héllo\"").unwrap(), Value::string("héllo"));
923        assert_eq!(read("\"日本語\"").unwrap(), Value::string("日本語"));
924        assert_eq!(read("\"🎉\"").unwrap(), Value::string("🎉"));
925    }
926
927    #[test]
928    fn test_read_string_with_parens() {
929        assert_eq!(read("\"(+ 1 2)\"").unwrap(), Value::string("(+ 1 2)"));
930    }
931
932    #[test]
933    fn test_read_operator_symbols() {
934        assert_eq!(read("+").unwrap(), Value::symbol("+"));
935        assert_eq!(read("*").unwrap(), Value::symbol("*"));
936        assert_eq!(read("/").unwrap(), Value::symbol("/"));
937        assert_eq!(read("<=").unwrap(), Value::symbol("<="));
938        assert_eq!(read(">=").unwrap(), Value::symbol(">="));
939    }
940
941    #[test]
942    fn test_read_predicate_symbols() {
943        assert_eq!(read("null?").unwrap(), Value::symbol("null?"));
944        assert_eq!(read("list?").unwrap(), Value::symbol("list?"));
945    }
946
947    #[test]
948    fn test_read_arrow_symbols() {
949        assert_eq!(
950            read("string->symbol").unwrap(),
951            Value::symbol("string->symbol")
952        );
953    }
954
955    #[test]
956    fn test_read_namespaced_symbols() {
957        assert_eq!(read("file/read").unwrap(), Value::symbol("file/read"));
958        assert_eq!(read("http/get").unwrap(), Value::symbol("http/get"));
959    }
960
961    #[test]
962    fn test_read_true_false_as_bool() {
963        assert_eq!(read("true").unwrap(), Value::bool(true));
964        assert_eq!(read("false").unwrap(), Value::bool(false));
965    }
966
967    #[test]
968    fn test_read_bare_colon_error() {
969        // `:` alone without a name should error
970        assert!(read(":").is_err());
971    }
972
973    #[test]
974    fn test_read_keyword_with_numbers() {
975        assert_eq!(read(":foo123").unwrap(), Value::keyword("foo123"));
976    }
977
978    #[test]
979    fn test_read_keyword_with_hyphens() {
980        assert_eq!(read(":max-turns").unwrap(), Value::keyword("max-turns"));
981    }
982
983    #[test]
984    fn test_read_hash_invalid() {
985        assert!(read("#x").is_err());
986        assert!(read("#").is_err());
987    }
988
989    #[test]
990    fn test_read_empty() {
991        assert_eq!(read("").unwrap(), Value::nil());
992    }
993
994    #[test]
995    fn test_read_whitespace_only() {
996        assert_eq!(read("   \n\t  ").unwrap(), Value::nil());
997    }
998
999    #[test]
1000    fn test_read_many_empty() {
1001        assert_eq!(read_many("").unwrap(), vec![]);
1002    }
1003
1004    #[test]
1005    fn test_read_many_whitespace_only() {
1006        assert_eq!(read_many("  \n  ").unwrap(), vec![]);
1007    }
1008
1009    #[test]
1010    fn test_read_comment_only() {
1011        assert_eq!(read_many("; just a comment").unwrap(), vec![]);
1012    }
1013
1014    #[test]
1015    fn test_read_empty_list() {
1016        assert_eq!(read("()").unwrap(), Value::list(vec![]));
1017    }
1018
1019    #[test]
1020    fn test_read_deeply_nested() {
1021        let result = read("((((42))))").unwrap();
1022        assert_eq!(
1023            result,
1024            Value::list(vec![Value::list(vec![Value::list(vec![Value::list(
1025                vec![Value::int(42)]
1026            )])])])
1027        );
1028    }
1029
1030    #[test]
1031    fn test_read_unterminated_list() {
1032        assert!(read("(1 2").is_err());
1033    }
1034
1035    #[test]
1036    fn test_read_extra_rparen() {
1037        // `read` only reads one expr, so extra `)` is just ignored (not consumed)
1038        // But `read_many` should fail since `)` is not a valid expr start
1039        let result = read("42").unwrap();
1040        assert_eq!(result, Value::int(42));
1041    }
1042
1043    #[test]
1044    fn test_read_dotted_pair() {
1045        let result = read("(a . b)").unwrap();
1046        assert_eq!(
1047            result,
1048            Value::list(vec![
1049                Value::symbol("a"),
1050                Value::symbol("."),
1051                Value::symbol("b")
1052            ])
1053        );
1054    }
1055
1056    #[test]
1057    fn test_read_empty_vector() {
1058        assert_eq!(read("[]").unwrap(), Value::vector(vec![]));
1059    }
1060
1061    #[test]
1062    fn test_read_unterminated_vector() {
1063        assert!(read("[1 2").is_err());
1064    }
1065
1066    #[test]
1067    fn test_read_empty_map() {
1068        assert_eq!(read("{}").unwrap(), Value::map(BTreeMap::new()));
1069    }
1070
1071    #[test]
1072    fn test_read_unterminated_map() {
1073        assert!(read("{:a 1").is_err());
1074    }
1075
1076    #[test]
1077    fn test_read_map_odd_elements() {
1078        assert!(read("{:a 1 :b}").is_err());
1079    }
1080
1081    #[test]
1082    fn test_read_map_duplicate_keys() {
1083        // Later key wins (BTreeMap insert replaces)
1084        let result = read("{:a 1 :a 2}").unwrap();
1085        let mut expected = BTreeMap::new();
1086        expected.insert(Value::keyword("a"), Value::int(2));
1087        assert_eq!(result, Value::map(expected));
1088    }
1089
1090    #[test]
1091    fn test_read_nested_quote() {
1092        let result = read("''foo").unwrap();
1093        assert_eq!(
1094            result,
1095            Value::list(vec![
1096                Value::symbol("quote"),
1097                Value::list(vec![Value::symbol("quote"), Value::symbol("foo")])
1098            ])
1099        );
1100    }
1101
1102    #[test]
1103    fn test_read_quote_list() {
1104        let result = read("'(1 2 3)").unwrap();
1105        assert_eq!(
1106            result,
1107            Value::list(vec![
1108                Value::symbol("quote"),
1109                Value::list(vec![Value::int(1), Value::int(2), Value::int(3)])
1110            ])
1111        );
1112    }
1113
1114    #[test]
1115    fn test_read_quote_at_eof() {
1116        assert!(read("'").is_err());
1117    }
1118
1119    #[test]
1120    fn test_read_unquote_at_eof() {
1121        assert!(read(",").is_err());
1122    }
1123
1124    #[test]
1125    fn test_read_unquote_splice_at_eof() {
1126        assert!(read(",@").is_err());
1127    }
1128
1129    #[test]
1130    fn test_read_quasiquote_at_eof() {
1131        assert!(read("`").is_err());
1132    }
1133
1134    #[test]
1135    fn test_read_comment_after_expr() {
1136        assert_eq!(read_many("42 ; comment").unwrap(), vec![Value::int(42)]);
1137    }
1138
1139    #[test]
1140    fn test_read_multiple_comments() {
1141        let result = read_many("; first\n; second\n42").unwrap();
1142        assert_eq!(result, vec![Value::int(42)]);
1143    }
1144
1145    #[test]
1146    fn test_read_comment_no_newline() {
1147        // Comment at end of input without trailing newline
1148        assert_eq!(read_many("; comment").unwrap(), vec![]);
1149    }
1150
1151    #[test]
1152    fn test_read_crlf_line_endings() {
1153        let result = read_many("1\r\n2\r\n3").unwrap();
1154        assert_eq!(result, vec![Value::int(1), Value::int(2), Value::int(3)]);
1155    }
1156
1157    #[test]
1158    fn test_read_tabs_as_whitespace() {
1159        assert_eq!(
1160            read("(\t+\t1\t2\t)").unwrap(),
1161            Value::list(vec![Value::symbol("+"), Value::int(1), Value::int(2)])
1162        );
1163    }
1164
1165    #[test]
1166    fn test_read_mixed_collections() {
1167        // List containing vector and map
1168        let result = read("([1 2] {:a 3})").unwrap();
1169        let mut map = BTreeMap::new();
1170        map.insert(Value::keyword("a"), Value::int(3));
1171        assert_eq!(
1172            result,
1173            Value::list(vec![
1174                Value::vector(vec![Value::int(1), Value::int(2)]),
1175                Value::map(map)
1176            ])
1177        );
1178    }
1179
1180    #[test]
1181    fn test_read_many_mixed_types() {
1182        let result = read_many(r#"42 3.14 "hello" foo :bar #t nil"#).unwrap();
1183        assert_eq!(result.len(), 7);
1184        assert_eq!(result[0], Value::int(42));
1185        assert_eq!(result[1], Value::float(3.14));
1186        assert_eq!(result[2], Value::string("hello"));
1187        assert_eq!(result[3], Value::symbol("foo"));
1188        assert_eq!(result[4], Value::keyword("bar"));
1189        assert_eq!(result[5], Value::bool(true));
1190        assert_eq!(result[6], Value::nil());
1191    }
1192
1193    #[test]
1194    fn test_span_map_tracks_lists() {
1195        let (exprs, spans) = read_many_with_spans("(+ 1 2)").unwrap();
1196        assert_eq!(exprs.len(), 1);
1197        // The list should have a span entry
1198        let rc = exprs[0].as_list_rc().expect("expected list");
1199        let ptr = Rc::as_ptr(&rc) as usize;
1200        let span = spans.get(&ptr).expect("list should have span");
1201        assert_eq!(span.line, 1);
1202        assert_eq!(span.col, 1);
1203    }
1204
1205    #[test]
1206    fn test_span_map_multiline() {
1207        let (exprs, spans) = read_many_with_spans("(foo)\n(bar)").unwrap();
1208        assert_eq!(exprs.len(), 2);
1209        let rc = exprs[1].as_list_rc().expect("expected list");
1210        let ptr = Rc::as_ptr(&rc) as usize;
1211        let span = spans.get(&ptr).expect("second list should have span");
1212        assert_eq!(span.line, 2);
1213        assert_eq!(span.col, 1);
1214    }
1215
1216    #[test]
1217    fn test_read_unexpected_char() {
1218        assert!(read("@").is_err());
1219        assert!(read("$").is_err());
1220    }
1221
1222    #[test]
1223    fn test_read_char_literal() {
1224        assert_eq!(read("#\\a").unwrap(), Value::char('a'));
1225        assert_eq!(read("#\\Z").unwrap(), Value::char('Z'));
1226        assert_eq!(read("#\\0").unwrap(), Value::char('0'));
1227    }
1228
1229    #[test]
1230    fn test_read_char_named() {
1231        assert_eq!(read("#\\space").unwrap(), Value::char(' '));
1232        assert_eq!(read("#\\newline").unwrap(), Value::char('\n'));
1233        assert_eq!(read("#\\tab").unwrap(), Value::char('\t'));
1234        assert_eq!(read("#\\return").unwrap(), Value::char('\r'));
1235        assert_eq!(read("#\\nul").unwrap(), Value::char('\0'));
1236    }
1237
1238    #[test]
1239    fn test_read_char_special() {
1240        assert_eq!(read("#\\(").unwrap(), Value::char('('));
1241        assert_eq!(read("#\\)").unwrap(), Value::char(')'));
1242    }
1243
1244    #[test]
1245    fn test_read_char_in_list() {
1246        let result = read("(#\\a #\\b)").unwrap();
1247        assert_eq!(
1248            result,
1249            Value::list(vec![Value::char('a'), Value::char('b')])
1250        );
1251    }
1252
1253    #[test]
1254    fn test_read_char_unknown_name() {
1255        assert!(read("#\\foobar").is_err());
1256    }
1257
1258    #[test]
1259    fn test_read_char_eof() {
1260        assert!(read("#\\").is_err());
1261    }
1262
1263    #[test]
1264    fn test_read_bytevector_literal() {
1265        assert_eq!(
1266            read("#u8(1 2 3)").unwrap(),
1267            Value::bytevector(vec![1, 2, 3])
1268        );
1269    }
1270
1271    #[test]
1272    fn test_read_bytevector_empty() {
1273        assert_eq!(read("#u8()").unwrap(), Value::bytevector(vec![]));
1274    }
1275
1276    #[test]
1277    fn test_read_bytevector_single() {
1278        assert_eq!(read("#u8(255)").unwrap(), Value::bytevector(vec![255]));
1279    }
1280
1281    #[test]
1282    fn test_read_bytevector_out_of_range() {
1283        assert!(read("#u8(256)").is_err());
1284    }
1285
1286    #[test]
1287    fn test_read_bytevector_negative() {
1288        assert!(read("#u8(-1)").is_err());
1289    }
1290
1291    #[test]
1292    fn test_read_bytevector_non_integer() {
1293        assert!(read("#u8(1.5)").is_err());
1294    }
1295
1296    #[test]
1297    fn test_read_bytevector_unterminated() {
1298        assert!(read("#u8(1 2").is_err());
1299    }
1300
1301    #[test]
1302    fn test_read_bytevector_in_list() {
1303        let result = read("(#u8(1 2) #u8(3))").unwrap();
1304        assert_eq!(
1305            result,
1306            Value::list(vec![
1307                Value::bytevector(vec![1, 2]),
1308                Value::bytevector(vec![3]),
1309            ])
1310        );
1311    }
1312
1313    #[test]
1314    fn test_read_string_hex_escape_basic() {
1315        // \x41; is 'A'
1316        let result = read(r#""\x41;""#).unwrap();
1317        assert_eq!(result, Value::string("A"));
1318    }
1319
1320    #[test]
1321    fn test_read_string_hex_escape_lowercase() {
1322        let result = read(r#""\x6c;""#).unwrap();
1323        assert_eq!(result, Value::string("l"));
1324    }
1325
1326    #[test]
1327    fn test_read_string_hex_escape_mixed_case() {
1328        let result = read(r#""\x4F;""#).unwrap();
1329        assert_eq!(result, Value::string("O"));
1330    }
1331
1332    #[test]
1333    fn test_read_string_hex_escape_esc_char() {
1334        // \x1B; is ESC (0x1b) — the main motivating use case
1335        let result = read(r#""\x1B;""#).unwrap();
1336        assert_eq!(result, Value::string("\x1B"));
1337    }
1338
1339    #[test]
1340    fn test_read_string_hex_escape_null() {
1341        let result = read(r#""\x0;""#).unwrap();
1342        assert_eq!(result, Value::string("\0"));
1343    }
1344
1345    #[test]
1346    fn test_read_string_hex_escape_unicode() {
1347        // \x3BB; is λ (Greek small letter lambda)
1348        let result = read(r#""\x3BB;""#).unwrap();
1349        assert_eq!(result, Value::string("λ"));
1350    }
1351
1352    #[test]
1353    fn test_read_string_hex_escape_emoji() {
1354        // \x1F600; is 😀
1355        let result = read(r#""\x1F600;""#).unwrap();
1356        assert_eq!(result, Value::string("😀"));
1357    }
1358
1359    #[test]
1360    fn test_read_string_hex_escape_in_context() {
1361        // Mix hex escapes with regular text and other escapes
1362        let result = read(r#""hello\x20;world""#).unwrap();
1363        assert_eq!(result, Value::string("hello world"));
1364    }
1365
1366    #[test]
1367    fn test_read_string_hex_escape_multiple() {
1368        let result = read(r#""\x48;\x69;""#).unwrap();
1369        assert_eq!(result, Value::string("Hi"));
1370    }
1371
1372    #[test]
1373    fn test_read_string_hex_escape_missing_semicolon() {
1374        assert!(read(r#""\x41""#).is_err());
1375    }
1376
1377    #[test]
1378    fn test_read_string_hex_escape_no_digits() {
1379        assert!(read(r#""\x;""#).is_err());
1380    }
1381
1382    #[test]
1383    fn test_read_string_hex_escape_invalid_hex() {
1384        assert!(read(r#""\xGG;""#).is_err());
1385    }
1386
1387    #[test]
1388    fn test_read_string_hex_escape_invalid_codepoint() {
1389        // 0xD800 is a surrogate — invalid Unicode scalar
1390        assert!(read(r#""\xD800;""#).is_err());
1391    }
1392
1393    #[test]
1394    fn test_read_string_hex_escape_too_large() {
1395        // 0x110000 is above Unicode max
1396        assert!(read(r#""\x110000;""#).is_err());
1397    }
1398
1399    #[test]
1400    fn test_read_string_u_escape_basic() {
1401        // \u0041 is 'A'
1402        let result = read(r#""\u0041""#).unwrap();
1403        assert_eq!(result, Value::string("A"));
1404    }
1405
1406    #[test]
1407    fn test_read_string_u_escape_lambda() {
1408        let result = read(r#""\u03BB""#).unwrap();
1409        assert_eq!(result, Value::string("λ"));
1410    }
1411
1412    #[test]
1413    fn test_read_string_u_escape_esc() {
1414        let result = read(r#""\u001B""#).unwrap();
1415        assert_eq!(result, Value::string("\x1B"));
1416    }
1417
1418    #[test]
1419    fn test_read_string_u_escape_too_few_digits() {
1420        assert!(read(r#""\u041""#).is_err());
1421    }
1422
1423    #[test]
1424    fn test_read_string_u_escape_surrogate() {
1425        assert!(read(r#""\uD800""#).is_err());
1426    }
1427
1428    #[test]
1429    fn test_read_string_big_u_escape_basic() {
1430        let result = read(r#""\U00000041""#).unwrap();
1431        assert_eq!(result, Value::string("A"));
1432    }
1433
1434    #[test]
1435    fn test_read_string_big_u_escape_emoji() {
1436        let result = read(r#""\U0001F600""#).unwrap();
1437        assert_eq!(result, Value::string("😀"));
1438    }
1439
1440    #[test]
1441    fn test_read_string_big_u_escape_too_few_digits() {
1442        assert!(read(r#""\U0041""#).is_err());
1443    }
1444
1445    #[test]
1446    fn test_read_string_big_u_escape_invalid() {
1447        assert!(read(r#""\U00110000""#).is_err());
1448    }
1449
1450    #[test]
1451    fn test_read_string_null_escape() {
1452        let result = read(r#""\0""#).unwrap();
1453        assert_eq!(result, Value::string("\0"));
1454    }
1455
1456    #[test]
1457    fn test_read_string_mixed_escapes() {
1458        // Mix all escape types in one string
1459        let result = read(r#""\x48;\u0069\n\t""#).unwrap();
1460        assert_eq!(result, Value::string("Hi\n\t"));
1461    }
1462
1463    #[test]
1464    fn test_read_string_ansi_escape_sequence() {
1465        // Real-world: ANSI color code ESC[31m (red)
1466        let result = read(r#""\x1B;[31mRed\x1B;[0m""#).unwrap();
1467        assert_eq!(result, Value::string("\x1B[31mRed\x1B[0m"));
1468    }
1469
1470    // ── f-string tests ──
1471
1472    #[test]
1473    fn test_read_fstring_no_interpolation() {
1474        let result = read(r#"f"hello""#).unwrap();
1475        assert_eq!(
1476            result,
1477            Value::list(vec![Value::symbol("str"), Value::string("hello")])
1478        );
1479    }
1480
1481    #[test]
1482    fn test_read_fstring_single_var() {
1483        let result = read(r#"f"hello ${name}""#).unwrap();
1484        assert_eq!(
1485            result,
1486            Value::list(vec![
1487                Value::symbol("str"),
1488                Value::string("hello "),
1489                Value::symbol("name"),
1490            ])
1491        );
1492    }
1493
1494    #[test]
1495    fn test_read_fstring_multiple_vars() {
1496        let result = read(r#"f"${a} and ${b}""#).unwrap();
1497        assert_eq!(
1498            result,
1499            Value::list(vec![
1500                Value::symbol("str"),
1501                Value::symbol("a"),
1502                Value::string(" and "),
1503                Value::symbol("b"),
1504            ])
1505        );
1506    }
1507
1508    #[test]
1509    fn test_read_fstring_expression() {
1510        let result = read(r#"f"result: ${(+ 1 2)}""#).unwrap();
1511        assert_eq!(
1512            result,
1513            Value::list(vec![
1514                Value::symbol("str"),
1515                Value::string("result: "),
1516                Value::list(vec![Value::symbol("+"), Value::int(1), Value::int(2),]),
1517            ])
1518        );
1519    }
1520
1521    #[test]
1522    fn test_read_fstring_escaped_dollar() {
1523        let result = read(r#"f"costs \$5""#).unwrap();
1524        assert_eq!(
1525            result,
1526            Value::list(vec![Value::symbol("str"), Value::string("costs $5")])
1527        );
1528    }
1529
1530    #[test]
1531    fn test_read_fstring_dollar_without_brace() {
1532        let result = read(r#"f"costs $5""#).unwrap();
1533        assert_eq!(
1534            result,
1535            Value::list(vec![Value::symbol("str"), Value::string("costs $5")])
1536        );
1537    }
1538
1539    #[test]
1540    fn test_read_fstring_escape_sequences() {
1541        let result = read(r#"f"line1\nline2""#).unwrap();
1542        assert_eq!(
1543            result,
1544            Value::list(vec![Value::symbol("str"), Value::string("line1\nline2"),])
1545        );
1546    }
1547
1548    #[test]
1549    fn test_read_fstring_empty_interpolation_error() {
1550        assert!(read(r#"f"hello ${}""#).is_err());
1551    }
1552
1553    #[test]
1554    fn test_read_fstring_unterminated_interpolation_error() {
1555        assert!(read(r#"f"hello ${name""#).is_err());
1556    }
1557
1558    #[test]
1559    fn test_read_fstring_unterminated_string_error() {
1560        assert!(read(r#"f"hello"#).is_err());
1561    }
1562
1563    #[test]
1564    fn test_read_fstring_keyword_access() {
1565        let result = read(r#"f"name: ${(:name user)}""#).unwrap();
1566        assert_eq!(
1567            result,
1568            Value::list(vec![
1569                Value::symbol("str"),
1570                Value::string("name: "),
1571                Value::list(vec![Value::keyword("name"), Value::symbol("user")]),
1572            ])
1573        );
1574    }
1575
1576    #[test]
1577    fn test_read_fstring_in_list() {
1578        let result = read(r#"(println f"hello ${name}")"#).unwrap();
1579        assert_eq!(
1580            result,
1581            Value::list(vec![
1582                Value::symbol("println"),
1583                Value::list(vec![
1584                    Value::symbol("str"),
1585                    Value::string("hello "),
1586                    Value::symbol("name"),
1587                ]),
1588            ])
1589        );
1590    }
1591
1592    #[test]
1593    fn test_read_fstring_empty() {
1594        let result = read(r#"f"""#).unwrap();
1595        assert_eq!(result, Value::list(vec![Value::symbol("str")]));
1596    }
1597
1598    #[test]
1599    fn test_read_fstring_only_expr() {
1600        let result = read(r#"f"${x}""#).unwrap();
1601        assert_eq!(
1602            result,
1603            Value::list(vec![Value::symbol("str"), Value::symbol("x")])
1604        );
1605    }
1606
1607    #[test]
1608    fn test_read_f_symbol_still_works() {
1609        // Plain 'f' symbol (not followed by '"') should still parse as symbol
1610        let result = read("f").unwrap();
1611        assert_eq!(result, Value::symbol("f"));
1612    }
1613
1614    #[test]
1615    fn test_read_f_prefixed_symbol_still_works() {
1616        // 'foo' should still parse as a normal symbol
1617        let result = read("foo").unwrap();
1618        assert_eq!(result, Value::symbol("foo"));
1619    }
1620
1621    // ── short lambda tests ──
1622
1623    #[test]
1624    fn test_read_short_lambda_single_arg() {
1625        // #(+ % 1) → (lambda (%1) (+ %1 1))
1626        let result = read("#(+ % 1)").unwrap();
1627        assert_eq!(
1628            result,
1629            Value::list(vec![
1630                Value::symbol("lambda"),
1631                Value::list(vec![Value::symbol("%1")]),
1632                Value::list(vec![Value::symbol("+"), Value::symbol("%1"), Value::int(1),]),
1633            ])
1634        );
1635    }
1636
1637    #[test]
1638    fn test_read_short_lambda_two_args() {
1639        // #(+ %1 %2) → (lambda (%1 %2) (+ %1 %2))
1640        let result = read("#(+ %1 %2)").unwrap();
1641        assert_eq!(
1642            result,
1643            Value::list(vec![
1644                Value::symbol("lambda"),
1645                Value::list(vec![Value::symbol("%1"), Value::symbol("%2")]),
1646                Value::list(vec![
1647                    Value::symbol("+"),
1648                    Value::symbol("%1"),
1649                    Value::symbol("%2"),
1650                ]),
1651            ])
1652        );
1653    }
1654
1655    #[test]
1656    fn test_read_short_lambda_bare_percent_is_percent1() {
1657        // #(* % %) → (lambda (%1) (* %1 %1))
1658        let result = read("#(* % %)").unwrap();
1659        assert_eq!(
1660            result,
1661            Value::list(vec![
1662                Value::symbol("lambda"),
1663                Value::list(vec![Value::symbol("%1")]),
1664                Value::list(vec![
1665                    Value::symbol("*"),
1666                    Value::symbol("%1"),
1667                    Value::symbol("%1"),
1668                ]),
1669            ])
1670        );
1671    }
1672
1673    #[test]
1674    fn test_read_short_lambda_no_args() {
1675        // #(println "hello") → (lambda () (println "hello"))
1676        let result = read(r#"#(println "hello")"#).unwrap();
1677        assert_eq!(
1678            result,
1679            Value::list(vec![
1680                Value::symbol("lambda"),
1681                Value::list(vec![]),
1682                Value::list(vec![Value::symbol("println"), Value::string("hello"),]),
1683            ])
1684        );
1685    }
1686
1687    #[test]
1688    fn test_read_short_lambda_in_list() {
1689        // (map #(+ % 1) numbers)
1690        let result = read("(map #(+ % 1) numbers)").unwrap();
1691        assert_eq!(
1692            result,
1693            Value::list(vec![
1694                Value::symbol("map"),
1695                Value::list(vec![
1696                    Value::symbol("lambda"),
1697                    Value::list(vec![Value::symbol("%1")]),
1698                    Value::list(vec![Value::symbol("+"), Value::symbol("%1"), Value::int(1),]),
1699                ]),
1700                Value::symbol("numbers"),
1701            ])
1702        );
1703    }
1704
1705    #[test]
1706    fn test_read_short_lambda_unterminated() {
1707        assert!(read("#(+ % 1").is_err());
1708    }
1709
1710    #[test]
1711    fn test_read_short_lambda_nested_expr() {
1712        // #(> (string-length %) 3) → (lambda (%1) (> (string-length %1) 3))
1713        let result = read("#(> (string-length %) 3)").unwrap();
1714        assert_eq!(
1715            result,
1716            Value::list(vec![
1717                Value::symbol("lambda"),
1718                Value::list(vec![Value::symbol("%1")]),
1719                Value::list(vec![
1720                    Value::symbol(">"),
1721                    Value::list(vec![Value::symbol("string-length"), Value::symbol("%1"),]),
1722                    Value::int(3),
1723                ]),
1724            ])
1725        );
1726    }
1727
1728    #[test]
1729    fn test_read_regex_literal_digits() {
1730        let result = read(r#"#"\d+""#).unwrap();
1731        assert_eq!(result, Value::string(r"\d+"));
1732    }
1733
1734    #[test]
1735    fn test_read_regex_literal_char_class() {
1736        let result = read(r#"#"[a-z]+""#).unwrap();
1737        assert_eq!(result, Value::string("[a-z]+"));
1738    }
1739
1740    #[test]
1741    fn test_read_regex_literal_backslashes_literal() {
1742        let result = read(r#"#"hello\.world""#).unwrap();
1743        assert_eq!(result, Value::string(r"hello\.world"));
1744    }
1745
1746    #[test]
1747    fn test_read_regex_literal_escaped_quote() {
1748        let result = read(r#"#"foo\"bar""#).unwrap();
1749        assert_eq!(result, Value::string(r#"foo"bar"#));
1750    }
1751
1752    #[test]
1753    fn test_read_regex_literal_unterminated() {
1754        assert!(read(r#"#"abc"#).is_err());
1755    }
1756
1757    #[test]
1758    fn test_mismatched_paren_bracket() {
1759        let err = read("(list [1 2 3)").unwrap_err();
1760        let msg = err.to_string();
1761        assert!(
1762            msg.contains("mismatched"),
1763            "expected mismatched error, got: {msg}"
1764        );
1765    }
1766
1767    #[test]
1768    fn test_mismatched_bracket_paren() {
1769        let err = read("[1 2 3)").unwrap_err();
1770        let msg = err.to_string();
1771        assert!(
1772            msg.contains("mismatched"),
1773            "expected mismatched error, got: {msg}"
1774        );
1775    }
1776
1777    #[test]
1778    fn test_mismatched_paren_brace() {
1779        let err = read("(+ 1 2}").unwrap_err();
1780        let msg = err.to_string();
1781        assert!(
1782            msg.contains("mismatched"),
1783            "expected mismatched error, got: {msg}"
1784        );
1785    }
1786
1787    #[test]
1788    fn test_mismatched_brace_paren() {
1789        let err = read("{:a 1)").unwrap_err();
1790        let msg = err.to_string();
1791        assert!(
1792            msg.contains("mismatched"),
1793            "expected mismatched error, got: {msg}"
1794        );
1795    }
1796
1797    #[test]
1798    fn test_mismatched_brace_bracket() {
1799        let err = read("{:a 1]").unwrap_err();
1800        let msg = err.to_string();
1801        assert!(
1802            msg.contains("mismatched"),
1803            "expected mismatched error, got: {msg}"
1804        );
1805    }
1806
1807    #[test]
1808    fn test_mismatched_bracket_brace() {
1809        let err = read("[1 2}").unwrap_err();
1810        let msg = err.to_string();
1811        assert!(
1812            msg.contains("mismatched"),
1813            "expected mismatched error, got: {msg}"
1814        );
1815    }
1816
1817    #[test]
1818    fn test_correct_brackets_still_work() {
1819        assert!(read("(list [1 2 3])").is_ok());
1820        assert!(read("{:a 1}").is_ok());
1821        assert!(read("[1 [2 3] 4]").is_ok());
1822    }
1823
1824    #[test]
1825    fn test_auto_gensym_symbol_parsing() {
1826        let val = read("v#").unwrap();
1827        assert_eq!(val.as_symbol().unwrap(), "v#");
1828
1829        let val = read("tmp#").unwrap();
1830        assert_eq!(val.as_symbol().unwrap(), "tmp#");
1831
1832        let val = read("`(let ((v# 1)) v#)").unwrap();
1833        let items = val.as_list().unwrap();
1834        assert_eq!(items[0].as_symbol().unwrap(), "quasiquote");
1835    }
1836
1837    #[test]
1838    fn test_hash_reader_dispatch_still_works() {
1839        let val = read("#t").unwrap();
1840        assert_eq!(val.as_bool(), Some(true));
1841
1842        let val = read("#f").unwrap();
1843        assert_eq!(val.as_bool(), Some(false));
1844
1845        let val = read("#\\space").unwrap();
1846        assert_eq!(val.as_char(), Some(' '));
1847
1848        let val = read("#(+ % 1)").unwrap();
1849        assert!(val.as_list().is_some());
1850    }
1851
1852    #[test]
1853    fn test_auto_gensym_edge_cases() {
1854        let val = read("x##").unwrap();
1855        assert_eq!(val.as_symbol().unwrap(), "x##");
1856
1857        let val = read(":foo").unwrap();
1858        assert!(val.as_keyword().is_some());
1859    }
1860
1861    // ── Error recovery tests ─────────────────────────────────────
1862
1863    #[test]
1864    fn recover_valid_input_no_errors() {
1865        let (exprs, _, _, errors) = read_many_with_spans_recover("(+ 1 2) (- 3 4)");
1866        assert!(errors.is_empty());
1867        assert_eq!(exprs.len(), 2);
1868    }
1869
1870    #[test]
1871    fn recover_stray_closer_then_valid() {
1872        // Stray `)` then a valid form
1873        let (exprs, _, _, errors) = read_many_with_spans_recover(") (+ 1 2)");
1874        assert_eq!(errors.len(), 1);
1875        assert_eq!(exprs.len(), 1);
1876    }
1877
1878    #[test]
1879    fn recover_unclosed_then_valid() {
1880        // Unclosed list, then a valid form on the next line
1881        let (_exprs, _, _, errors) = read_many_with_spans_recover("(define x\n(+ 1 2)");
1882        // The first `(define x` consumes tokens including `(+ 1 2)` as part of
1883        // its unterminated body, then hits EOF → 1 error, the (+ 1 2) is inside it
1884        assert_eq!(errors.len(), 1);
1885        // The second form got consumed by the unterminated first form
1886        // so recovery can't salvage it — this is expected
1887    }
1888
1889    #[test]
1890    fn recover_multiple_stray_closers() {
1891        let (exprs, _, _, errors) = read_many_with_spans_recover(") ] } (define x 1)");
1892        assert_eq!(errors.len(), 3);
1893        assert_eq!(exprs.len(), 1);
1894        assert!(exprs[0].as_list().is_some());
1895    }
1896
1897    #[test]
1898    fn recover_mismatched_bracket() {
1899        // Mismatched bracket: ( closed with ]
1900        let (exprs, _, _, errors) = read_many_with_spans_recover("(define x] (+ 1 2)");
1901        assert!(!errors.is_empty());
1902        // After the mismatch error, recovery should find `(+ 1 2)`
1903        assert!(!exprs.is_empty());
1904    }
1905
1906    #[test]
1907    fn recover_empty_input() {
1908        let (exprs, _, _, errors) = read_many_with_spans_recover("");
1909        assert!(errors.is_empty());
1910        assert!(exprs.is_empty());
1911    }
1912
1913    #[test]
1914    fn recover_only_errors() {
1915        let (exprs, _, _, errors) = read_many_with_spans_recover(") )");
1916        assert_eq!(errors.len(), 2);
1917        assert!(exprs.is_empty());
1918    }
1919
1920    #[test]
1921    fn recover_valid_between_errors() {
1922        // error, valid, error
1923        let (exprs, _, _, errors) = read_many_with_spans_recover(") (+ 1 2) )");
1924        assert_eq!(errors.len(), 2);
1925        assert_eq!(exprs.len(), 1);
1926    }
1927
1928    // ── symbol span tracking ──
1929
1930    #[test]
1931    fn test_symbol_spans_basic() {
1932        let (_, _, sym_spans) = read_many_with_symbol_spans("(define x 42)").unwrap();
1933        // Should record "define" and "x" (not 42 — it's an int, not a symbol)
1934        let names: Vec<&str> = sym_spans.iter().map(|(n, _)| n.as_str()).collect();
1935        assert!(names.contains(&"define"), "missing define in {:?}", names);
1936        assert!(names.contains(&"x"), "missing x in {:?}", names);
1937        assert_eq!(names.len(), 2);
1938    }
1939
1940    #[test]
1941    fn test_symbol_spans_positions() {
1942        let (_, _, sym_spans) = read_many_with_symbol_spans("(defun foo (x) x)").unwrap();
1943        // "foo" should have a precise span
1944        let foo = sym_spans.iter().find(|(n, _)| n == "foo").unwrap();
1945        assert_eq!(foo.1.line, 1);
1946        assert_eq!(foo.1.col, 8); // 1-indexed: "(defun " = 7 chars, foo starts at col 8
1947    }
1948
1949    #[test]
1950    fn test_symbol_spans_no_synthetic() {
1951        // '(a b) desugars to (quote (a b)) — "quote" should NOT appear in symbol_spans
1952        let (_, _, sym_spans) = read_many_with_symbol_spans("'(a b)").unwrap();
1953        let names: Vec<&str> = sym_spans.iter().map(|(n, _)| n.as_str()).collect();
1954        assert!(
1955            !names.contains(&"quote"),
1956            "synthetic 'quote' should not be in symbol_spans"
1957        );
1958        assert!(names.contains(&"a"));
1959        assert!(names.contains(&"b"));
1960    }
1961
1962    #[test]
1963    fn test_symbol_spans_multiple_forms() {
1964        let (_, _, sym_spans) =
1965            read_many_with_symbol_spans("(define x 1)\n(defun f (a) a)").unwrap();
1966        let names: Vec<&str> = sym_spans.iter().map(|(n, _)| n.as_str()).collect();
1967        assert!(names.contains(&"define"));
1968        assert!(names.contains(&"x"));
1969        assert!(names.contains(&"defun"));
1970        assert!(names.contains(&"f"));
1971        assert!(names.contains(&"a"));
1972        // "a" should appear twice (param + body reference)
1973        assert_eq!(names.iter().filter(|&&n| n == "a").count(), 2);
1974    }
1975
1976    #[test]
1977    fn test_symbol_spans_nil_excluded() {
1978        // "nil" parses as Value::nil(), not a symbol — should not be in symbol_spans
1979        let (_, _, sym_spans) = read_many_with_symbol_spans("nil").unwrap();
1980        assert!(sym_spans.is_empty());
1981    }
1982}