Skip to main content

bop/
lexer.rs

1#[cfg(feature = "no_std")]
2use alloc::{format, string::String, vec::Vec};
3
4use crate::error::BopError;
5
6#[derive(Debug, Clone, PartialEq)]
7pub enum StringPart {
8    Literal(String),
9    Variable(String),
10}
11
12#[derive(Debug, Clone, PartialEq)]
13pub enum Token {
14    // Literals
15    /// Integer literal — a digit sequence with no `.` part.
16    /// Lexed to `i64` at scan time; the parser maps it to
17    /// `ExprKind::Int` which the engines evaluate as
18    /// `Value::Int`.
19    Int(i64),
20    Number(f64),
21    Str(String),
22    StringInterp(Vec<StringPart>),
23    True,
24    False,
25    None,
26
27    // Identifiers & Keywords
28    Ident(String),
29    Let,
30    Const,
31    Fn,
32    Return,
33    If,
34    Else,
35    While,
36    For,
37    In,
38    Repeat,
39    Break,
40    Continue,
41    Use,
42    As,
43    Struct,
44    Enum,
45    Match,
46    Try,
47
48    // Operators
49    Plus,
50    Minus,
51    Star,
52    Slash,
53    Percent,
54    EqEq,
55    BangEq,
56    Lt,
57    Gt,
58    LtEq,
59    GtEq,
60    AmpAmp,
61    PipePipe,
62    Bang,
63    Eq,
64    PlusEq,
65    MinusEq,
66    StarEq,
67    SlashEq,
68    PercentEq,
69
70    // Delimiters
71    LParen,
72    RParen,
73    LBracket,
74    RBracket,
75    LBrace,
76    RBrace,
77    Comma,
78    Colon,
79    ColonColon,
80    Dot,
81    DotDot,
82    Semicolon,
83    FatArrow,
84    Pipe,
85
86    // Internal (removed after auto-semicolons)
87    Newline,
88
89    Eof,
90}
91
92#[derive(Debug, Clone)]
93pub struct SpannedToken {
94    pub token: Token,
95    pub line: u32,
96    /// 1-indexed column where the token starts. Used by the
97    /// parser and runtime to point error carats at the exact
98    /// offending character rather than just the line.
99    pub column: u32,
100}
101
102pub fn lex(source: &str) -> Result<Vec<SpannedToken>, BopError> {
103    let mut lexer = Lexer::new(source);
104    let raw = lexer.lex_all()?;
105    Ok(insert_semicolons(raw))
106}
107
108fn triggers_semicolon(token: &Token) -> bool {
109    matches!(
110        token,
111        Token::Ident(_)
112            | Token::Int(_)
113            | Token::Number(_)
114            | Token::Str(_)
115            | Token::StringInterp(_)
116            | Token::True
117            | Token::False
118            | Token::None
119            | Token::Break
120            | Token::Continue
121            | Token::Return
122            | Token::RParen
123            | Token::RBracket
124            | Token::RBrace
125    )
126}
127
128fn insert_semicolons(raw: Vec<SpannedToken>) -> Vec<SpannedToken> {
129    let mut result: Vec<SpannedToken> = Vec::new();
130    for token in raw {
131        if token.token == Token::Newline {
132            if let Some(last) = result.last() {
133                if triggers_semicolon(&last.token) {
134                    result.push(SpannedToken {
135                        token: Token::Semicolon,
136                        line: token.line,
137                        column: token.column,
138                    });
139                }
140            }
141        } else {
142            result.push(token);
143        }
144    }
145    result
146}
147
148struct Lexer {
149    chars: Vec<char>,
150    pos: usize,
151    line: u32,
152    /// 1-indexed column of the *next* character to consume.
153    /// Reset to 1 after each newline; incremented by `advance`.
154    column: u32,
155}
156
157impl Lexer {
158    fn new(source: &str) -> Self {
159        Self {
160            chars: source.chars().collect(),
161            pos: 0,
162            line: 1,
163            column: 1,
164        }
165    }
166
167    fn peek(&self) -> Option<char> {
168        self.chars.get(self.pos).copied()
169    }
170
171    fn peek_next(&self) -> Option<char> {
172        self.chars.get(self.pos + 1).copied()
173    }
174
175    fn advance(&mut self) -> Option<char> {
176        let ch = self.chars.get(self.pos).copied()?;
177        self.pos += 1;
178        if ch == '\n' {
179            // The newline itself belongs to the line it
180            // terminates; the next character starts at column 1
181            // of the *following* line. `line` gets bumped by the
182            // lexer's dispatch loop when it sees `\n`, so we
183            // only reset the column here.
184            self.column = 1;
185        } else {
186            self.column += 1;
187        }
188        Some(ch)
189    }
190
191    fn error(&self, message: impl Into<String>) -> BopError {
192        BopError {
193            line: Some(self.line),
194            column: Some(self.column),
195            message: message.into(),
196            friendly_hint: None,
197            is_fatal: false,
198            is_try_return: false,
199        }
200    }
201
202    fn error_with_hint(
203        &self,
204        message: impl Into<String>,
205        hint: impl Into<String>,
206    ) -> BopError {
207        BopError {
208            line: Some(self.line),
209            column: Some(self.column),
210            message: message.into(),
211            friendly_hint: Some(hint.into()),
212            is_fatal: false,
213            is_try_return: false,
214        }
215    }
216
217    fn lex_all(&mut self) -> Result<Vec<SpannedToken>, BopError> {
218        let mut tokens = Vec::new();
219
220        loop {
221            // Skip whitespace (not newlines)
222            while let Some(ch) = self.peek() {
223                if ch == ' ' || ch == '\t' || ch == '\r' {
224                    self.advance();
225                } else {
226                    break;
227                }
228            }
229
230            let Some(ch) = self.peek() else {
231                tokens.push(SpannedToken {
232                    token: Token::Eof,
233                    line: self.line,
234                    column: self.column,
235                });
236                break;
237            };
238
239            // Capture the token's start position before we start
240            // consuming characters — `self.line` / `self.column`
241            // move as we advance.
242            let line = self.line;
243            let column = self.column;
244
245            match ch {
246                '\n' => {
247                    self.advance();
248                    self.line += 1;
249                    tokens.push(SpannedToken {
250                        token: Token::Newline,
251                        line,
252                    column,
253                    });
254                }
255
256                '"' => {
257                    tokens.push(SpannedToken {
258                        token: self.lex_string()?,
259                        line,
260                    column,
261                    });
262                }
263
264                '0'..='9' => {
265                    tokens.push(SpannedToken {
266                        token: self.lex_number()?,
267                        line,
268                    column,
269                    });
270                }
271
272                'a'..='z' | 'A'..='Z' | '_' => {
273                    tokens.push(SpannedToken {
274                        token: self.lex_ident_or_keyword(),
275                        line,
276                    column,
277                    });
278                }
279
280                '+' => {
281                    self.advance();
282                    if self.peek() == Some('=') {
283                        self.advance();
284                        tokens.push(SpannedToken {
285                            token: Token::PlusEq,
286                            line,
287                        column,
288                        });
289                    } else {
290                        tokens.push(SpannedToken {
291                            token: Token::Plus,
292                            line,
293                        column,
294                        });
295                    }
296                }
297                '-' => {
298                    self.advance();
299                    if self.peek() == Some('=') {
300                        self.advance();
301                        tokens.push(SpannedToken {
302                            token: Token::MinusEq,
303                            line,
304                        column,
305                        });
306                    } else {
307                        tokens.push(SpannedToken {
308                            token: Token::Minus,
309                            line,
310                        column,
311                        });
312                    }
313                }
314                '*' => {
315                    self.advance();
316                    if self.peek() == Some('=') {
317                        self.advance();
318                        tokens.push(SpannedToken {
319                            token: Token::StarEq,
320                            line,
321                        column,
322                        });
323                    } else {
324                        tokens.push(SpannedToken {
325                            token: Token::Star,
326                            line,
327                        column,
328                        });
329                    }
330                }
331                '/' => {
332                    self.advance();
333                    if self.peek() == Some('=') {
334                        self.advance();
335                        tokens.push(SpannedToken {
336                            token: Token::SlashEq,
337                            line,
338                        column,
339                        });
340                    } else if self.peek() == Some('/') {
341                        // `//` — line comment. Runs to end of
342                        // line; no block-comment form. (There's
343                        // no integer-division operator: `/`
344                        // always returns a `Number` even for
345                        // `Int / Int`, and users who want an
346                        // integer result write `int(a / b)`.)
347                        self.advance();
348                        while let Some(c) = self.peek() {
349                            if c == '\n' {
350                                break;
351                            }
352                            self.advance();
353                        }
354                    } else {
355                        tokens.push(SpannedToken {
356                            token: Token::Slash,
357                            line,
358                        column,
359                        });
360                    }
361                }
362                '%' => {
363                    self.advance();
364                    if self.peek() == Some('=') {
365                        self.advance();
366                        tokens.push(SpannedToken {
367                            token: Token::PercentEq,
368                            line,
369                        column,
370                        });
371                    } else {
372                        tokens.push(SpannedToken {
373                            token: Token::Percent,
374                            line,
375                        column,
376                        });
377                    }
378                }
379
380                '=' => {
381                    self.advance();
382                    if self.peek() == Some('=') {
383                        self.advance();
384                        tokens.push(SpannedToken {
385                            token: Token::EqEq,
386                            line,
387                        column,
388                        });
389                    } else if self.peek() == Some('>') {
390                        self.advance();
391                        tokens.push(SpannedToken {
392                            token: Token::FatArrow,
393                            line,
394                        column,
395                        });
396                    } else {
397                        tokens.push(SpannedToken {
398                            token: Token::Eq,
399                            line,
400                        column,
401                        });
402                    }
403                }
404                '!' => {
405                    self.advance();
406                    if self.peek() == Some('=') {
407                        self.advance();
408                        tokens.push(SpannedToken {
409                            token: Token::BangEq,
410                            line,
411                        column,
412                        });
413                    } else {
414                        tokens.push(SpannedToken {
415                            token: Token::Bang,
416                            line,
417                        column,
418                        });
419                    }
420                }
421                '<' => {
422                    self.advance();
423                    if self.peek() == Some('=') {
424                        self.advance();
425                        tokens.push(SpannedToken {
426                            token: Token::LtEq,
427                            line,
428                        column,
429                        });
430                    } else {
431                        tokens.push(SpannedToken {
432                            token: Token::Lt,
433                            line,
434                        column,
435                        });
436                    }
437                }
438                '>' => {
439                    self.advance();
440                    if self.peek() == Some('=') {
441                        self.advance();
442                        tokens.push(SpannedToken {
443                            token: Token::GtEq,
444                            line,
445                        column,
446                        });
447                    } else {
448                        tokens.push(SpannedToken {
449                            token: Token::Gt,
450                            line,
451                        column,
452                        });
453                    }
454                }
455
456                '&' => {
457                    self.advance();
458                    if self.peek() == Some('&') {
459                        self.advance();
460                        tokens.push(SpannedToken {
461                            token: Token::AmpAmp,
462                            line,
463                        column,
464                        });
465                    } else {
466                        return Err(
467                            self.error_with_hint("Unexpected `&`", "Did you mean `&&` (and)?")
468                        );
469                    }
470                }
471                '|' => {
472                    self.advance();
473                    if self.peek() == Some('|') {
474                        self.advance();
475                        tokens.push(SpannedToken {
476                            token: Token::PipePipe,
477                            line,
478                        column,
479                        });
480                    } else {
481                        // Single `|` is now the or-pattern
482                        // separator inside `match` arms. Parser
483                        // decides whether it's accepted in the
484                        // current context.
485                        tokens.push(SpannedToken {
486                            token: Token::Pipe,
487                            line,
488                        column,
489                        });
490                    }
491                }
492
493                '(' => {
494                    self.advance();
495                    tokens.push(SpannedToken {
496                        token: Token::LParen,
497                        line,
498                    column,
499                    });
500                }
501                ')' => {
502                    self.advance();
503                    tokens.push(SpannedToken {
504                        token: Token::RParen,
505                        line,
506                    column,
507                    });
508                }
509                '[' => {
510                    self.advance();
511                    tokens.push(SpannedToken {
512                        token: Token::LBracket,
513                        line,
514                    column,
515                    });
516                }
517                ']' => {
518                    self.advance();
519                    tokens.push(SpannedToken {
520                        token: Token::RBracket,
521                        line,
522                    column,
523                    });
524                }
525                '{' => {
526                    self.advance();
527                    tokens.push(SpannedToken {
528                        token: Token::LBrace,
529                        line,
530                    column,
531                    });
532                }
533                '}' => {
534                    self.advance();
535                    tokens.push(SpannedToken {
536                        token: Token::RBrace,
537                        line,
538                    column,
539                    });
540                }
541                ',' => {
542                    self.advance();
543                    tokens.push(SpannedToken {
544                        token: Token::Comma,
545                        line,
546                    column,
547                    });
548                }
549                ':' => {
550                    self.advance();
551                    if self.peek() == Some(':') {
552                        self.advance();
553                        tokens.push(SpannedToken {
554                            token: Token::ColonColon,
555                            line,
556                        column,
557                        });
558                    } else {
559                        tokens.push(SpannedToken {
560                            token: Token::Colon,
561                            line,
562                        column,
563                        });
564                    }
565                }
566                '.' => {
567                    self.advance();
568                    if self.peek() == Some('.') {
569                        self.advance();
570                        tokens.push(SpannedToken {
571                            token: Token::DotDot,
572                            line,
573                        column,
574                        });
575                    } else {
576                        tokens.push(SpannedToken {
577                            token: Token::Dot,
578                            line,
579                        column,
580                        });
581                    }
582                }
583                ';' => {
584                    self.advance();
585                    tokens.push(SpannedToken {
586                        token: Token::Semicolon,
587                        line,
588                    column,
589                    });
590                }
591
592                _ => {
593                    return Err(self.error(format!("I don't understand the character `{}`", ch)));
594                }
595            }
596        }
597
598        Ok(tokens)
599    }
600
601    fn lex_number(&mut self) -> Result<Token, BopError> {
602        let mut s = String::new();
603        while let Some(ch) = self.peek() {
604            if ch.is_ascii_digit() {
605                s.push(ch);
606                self.advance();
607            } else {
608                break;
609            }
610        }
611        // A trailing `.<digit>` promotes to a float; `42..foo`
612        // or `42.` at EOF stays an Int so chained method calls
613        // (`42.str()`) and inclusive array-rest patterns still
614        // parse.
615        let is_float = if self.peek() == Some('.')
616            && self.peek_next().is_some_and(|c| c.is_ascii_digit())
617        {
618            s.push('.');
619            self.advance();
620            while let Some(ch) = self.peek() {
621                if ch.is_ascii_digit() {
622                    s.push(ch);
623                    self.advance();
624                } else {
625                    break;
626                }
627            }
628            true
629        } else {
630            false
631        };
632        if is_float {
633            let n: f64 = s
634                .parse()
635                .map_err(|_| self.error(format!("Invalid number: {}", s)))?;
636            Ok(Token::Number(n))
637        } else {
638            // Integer literal — try `i64`. Out-of-range values
639            // surface as a lex-time error rather than silently
640            // wrapping or degrading to `f64`, since that's the
641            // ergonomic opposite of "exact int arithmetic".
642            match s.parse::<i64>() {
643                Ok(n) => Ok(Token::Int(n)),
644                Err(_) => Err(self.error(format!(
645                    "Integer literal out of range for i64: {}",
646                    s
647                ))),
648            }
649        }
650    }
651
652    fn lex_ident_or_keyword(&mut self) -> Token {
653        let mut s = String::new();
654        while let Some(ch) = self.peek() {
655            if ch.is_ascii_alphanumeric() || ch == '_' {
656                s.push(ch);
657                self.advance();
658            } else {
659                break;
660            }
661        }
662        match s.as_str() {
663            "let" => Token::Let,
664            "const" => Token::Const,
665            "fn" => Token::Fn,
666            "return" => Token::Return,
667            "if" => Token::If,
668            "else" => Token::Else,
669            "while" => Token::While,
670            "for" => Token::For,
671            "in" => Token::In,
672            "repeat" => Token::Repeat,
673            "break" => Token::Break,
674            "continue" => Token::Continue,
675            "use" => Token::Use,
676            "as" => Token::As,
677            "struct" => Token::Struct,
678            "enum" => Token::Enum,
679            "match" => Token::Match,
680            "try" => Token::Try,
681            "true" => Token::True,
682            "false" => Token::False,
683            "none" => Token::None,
684            _ => Token::Ident(s),
685        }
686    }
687
688    fn lex_string(&mut self) -> Result<Token, BopError> {
689        self.advance(); // consume opening "
690        let mut parts: Vec<StringPart> = Vec::new();
691        let mut current = String::new();
692
693        loop {
694            match self.peek() {
695                None | Some('\n') => {
696                    return Err(self.error_with_hint(
697                        "This string is missing its closing `\"`",
698                        "Every string needs to start and end with quotes.",
699                    ));
700                }
701                Some('"') => {
702                    self.advance();
703                    break;
704                }
705                Some('\\') => {
706                    self.advance();
707                    match self.peek() {
708                        Some('"') => {
709                            current.push('"');
710                            self.advance();
711                        }
712                        Some('\\') => {
713                            current.push('\\');
714                            self.advance();
715                        }
716                        Some('n') => {
717                            current.push('\n');
718                            self.advance();
719                        }
720                        Some('t') => {
721                            current.push('\t');
722                            self.advance();
723                        }
724                        Some('r') => {
725                            // Windows / HTTP line endings; also
726                            // needed so `std.json` can stringify
727                            // and parse carriage returns inside
728                            // strings.
729                            current.push('\r');
730                            self.advance();
731                        }
732                        Some('{') => {
733                            current.push('{');
734                            self.advance();
735                        }
736                        Some('}') => {
737                            current.push('}');
738                            self.advance();
739                        }
740                        Some(c) => {
741                            return Err(self.error(format!("Unknown escape sequence `\\{}`", c)));
742                        }
743                        None => {
744                            return Err(self.error("Unexpected end of string after `\\`"));
745                        }
746                    }
747                }
748                Some('{')
749                    if self
750                        .peek_next()
751                        .is_some_and(|c| c.is_ascii_alphabetic() || c == '_') =>
752                {
753                    self.advance(); // consume {
754                    // Read variable name
755                    let mut var = String::new();
756                    while let Some(ch) = self.peek() {
757                        if ch.is_ascii_alphanumeric() || ch == '_' {
758                            var.push(ch);
759                            self.advance();
760                        } else {
761                            break;
762                        }
763                    }
764                    if self.peek() != Some('}') {
765                        return Err(self.error_with_hint(
766                            format!("Missing `}}` after `{{{}`", var),
767                            "String interpolation needs a closing `}`, like: \"{name}\"",
768                        ));
769                    }
770                    self.advance(); // consume }
771                    if !current.is_empty() {
772                        parts.push(StringPart::Literal(core::mem::take(&mut current)));
773                    }
774                    parts.push(StringPart::Variable(var));
775                }
776                Some(ch) => {
777                    current.push(ch);
778                    self.advance();
779                }
780            }
781        }
782
783        if parts.is_empty() {
784            // Plain string, no interpolation
785            Ok(Token::Str(current))
786        } else {
787            if !current.is_empty() {
788                parts.push(StringPart::Literal(current));
789            }
790            Ok(Token::StringInterp(parts))
791        }
792    }
793}
794
795#[cfg(test)]
796mod tests {
797    use super::*;
798
799    /// Lex and strip Eof, returning just token variants
800    fn toks(code: &str) -> Vec<Token> {
801        lex(code)
802            .unwrap()
803            .into_iter()
804            .map(|t| t.token)
805            .filter(|t| !matches!(t, Token::Eof))
806            .collect()
807    }
808
809    fn lex_err(code: &str) -> String {
810        lex(code).unwrap_err().message
811    }
812
813    // ─── Numbers ───────────────────────────────────────────────────
814
815    #[test]
816    fn integer() {
817        // Integer literals now lex to `Token::Int` (phase 6).
818        assert_eq!(toks("42"), vec![Token::Int(42)]);
819    }
820
821    #[test]
822    fn float() {
823        assert_eq!(toks("3.14"), vec![Token::Number(3.14)]);
824    }
825
826    #[test]
827    fn leading_zero_float() {
828        assert_eq!(toks("0.5"), vec![Token::Number(0.5)]);
829    }
830
831    // ─── Strings ───────────────────────────────────────────────────
832
833    #[test]
834    fn plain_string() {
835        assert_eq!(toks(r#""hello""#), vec![Token::Str("hello".into())]);
836    }
837
838    #[test]
839    fn escape_sequences() {
840        assert_eq!(
841            toks(r#""a\nb\t\\\"c""#),
842            vec![Token::Str("a\nb\t\\\"c".into())]
843        );
844    }
845
846    #[test]
847    fn escape_sequence_cr() {
848        // `\r` was added alongside `std.json` so carriage
849        // returns can live in string literals (Windows line
850        // endings, HTTP headers, etc.).
851        assert_eq!(
852            toks(r#""a\rb""#),
853            vec![Token::Str("a\rb".into())]
854        );
855    }
856
857    #[test]
858    fn string_interpolation() {
859        assert_eq!(
860            toks(r#""hi {name}!""#),
861            vec![Token::StringInterp(vec![
862                StringPart::Literal("hi ".into()),
863                StringPart::Variable("name".into()),
864                StringPart::Literal("!".into()),
865            ])]
866        );
867    }
868
869    #[test]
870    fn string_interpolation_multiple_vars() {
871        assert_eq!(
872            toks(r#""{x},{y}""#),
873            vec![Token::StringInterp(vec![
874                StringPart::Variable("x".into()),
875                StringPart::Literal(",".into()),
876                StringPart::Variable("y".into()),
877            ])]
878        );
879    }
880
881    #[test]
882    fn unterminated_string() {
883        assert!(lex_err(r#""hello"#).contains("missing its closing"));
884    }
885
886    #[test]
887    fn unknown_escape() {
888        assert!(lex_err(r#""hello\q""#).contains("Unknown escape"));
889    }
890
891    // ─── Keywords vs Identifiers ───────────────────────────────────
892
893    #[test]
894    fn keywords() {
895        assert_eq!(
896            toks("let fn return if else while for in repeat break continue true false none"),
897            vec![
898                Token::Let,
899                Token::Fn,
900                Token::Return,
901                Token::If,
902                Token::Else,
903                Token::While,
904                Token::For,
905                Token::In,
906                Token::Repeat,
907                Token::Break,
908                Token::Continue,
909                Token::True,
910                Token::False,
911                Token::None,
912            ]
913        );
914    }
915
916    #[test]
917    fn identifiers() {
918        assert_eq!(
919            toks("foo bar_baz _x abc123"),
920            vec![
921                Token::Ident("foo".into()),
922                Token::Ident("bar_baz".into()),
923                Token::Ident("_x".into()),
924                Token::Ident("abc123".into()),
925            ]
926        );
927    }
928
929    // ─── Operators ─────────────────────────────────────────────────
930
931    #[test]
932    fn single_char_ops() {
933        assert_eq!(
934            toks("+ - * / % = ! < > ( ) [ ] { } , : . ;"),
935            vec![
936                Token::Plus,
937                Token::Minus,
938                Token::Star,
939                Token::Slash,
940                Token::Percent,
941                Token::Eq,
942                Token::Bang,
943                Token::Lt,
944                Token::Gt,
945                Token::LParen,
946                Token::RParen,
947                Token::LBracket,
948                Token::RBracket,
949                Token::LBrace,
950                Token::RBrace,
951                Token::Comma,
952                Token::Colon,
953                Token::Dot,
954                Token::Semicolon,
955            ]
956        );
957    }
958
959    #[test]
960    fn double_char_ops() {
961        assert_eq!(
962            toks("== != <= >= && || += -= *= /= %="),
963            vec![
964                Token::EqEq,
965                Token::BangEq,
966                Token::LtEq,
967                Token::GtEq,
968                Token::AmpAmp,
969                Token::PipePipe,
970                Token::PlusEq,
971                Token::MinusEq,
972                Token::StarEq,
973                Token::SlashEq,
974                Token::PercentEq,
975            ]
976        );
977    }
978
979    #[test]
980    fn lone_ampersand_error() {
981        assert!(lex_err("&x").contains("Unexpected `&`"));
982    }
983
984    #[test]
985    fn lone_pipe_lexes_as_or_pattern_separator() {
986        // `|` is now the or-pattern separator inside `match`
987        // arms. It parses at the lexer level regardless of
988        // context; the parser decides whether it's accepted.
989        assert_eq!(
990            toks("|"),
991            vec![Token::Pipe]
992        );
993    }
994
995    // ─── Comments ──────────────────────────────────────────────────
996
997    #[test]
998    fn line_comment_skipped() {
999        assert_eq!(
1000            toks("1 // comment\n2"),
1001            vec![Token::Int(1), Token::Semicolon, Token::Int(2)]
1002        );
1003    }
1004
1005    #[test]
1006    fn comment_at_end() {
1007        assert_eq!(toks("x // done"), vec![Token::Ident("x".into())]);
1008    }
1009
1010    #[test]
1011    fn hash_is_not_a_comment() {
1012        // `#` used to be the line-comment token; comments are
1013        // now `//` and `#` is just an unrecognised character.
1014        assert!(lex_err("x # nope").contains("don't understand"));
1015    }
1016
1017    // ─── Auto-semicolons ──────────────────────────────────────────
1018
1019    #[test]
1020    fn auto_semi_after_ident() {
1021        assert_eq!(
1022            toks("x\ny"),
1023            vec![
1024                Token::Ident("x".into()),
1025                Token::Semicolon,
1026                Token::Ident("y".into()),
1027            ]
1028        );
1029    }
1030
1031    #[test]
1032    fn auto_semi_after_number() {
1033        assert_eq!(
1034            toks("42\n10"),
1035            vec![Token::Int(42), Token::Semicolon, Token::Int(10)]
1036        );
1037    }
1038
1039    #[test]
1040    fn auto_semi_after_rparen() {
1041        assert_eq!(
1042            toks("f()\ng()"),
1043            vec![
1044                Token::Ident("f".into()),
1045                Token::LParen,
1046                Token::RParen,
1047                Token::Semicolon,
1048                Token::Ident("g".into()),
1049                Token::LParen,
1050                Token::RParen,
1051            ]
1052        );
1053    }
1054
1055    #[test]
1056    fn auto_semi_after_rbrace() {
1057        assert_eq!(
1058            toks("{\n}\nx"),
1059            vec![
1060                Token::LBrace,
1061                Token::RBrace,
1062                Token::Semicolon,
1063                Token::Ident("x".into()),
1064            ]
1065        );
1066    }
1067
1068    #[test]
1069    fn no_semi_after_open_delim() {
1070        assert_eq!(toks("{\nx"), vec![Token::LBrace, Token::Ident("x".into()),]);
1071    }
1072
1073    #[test]
1074    fn no_semi_after_operator() {
1075        assert_eq!(
1076            toks("x +\ny"),
1077            vec![
1078                Token::Ident("x".into()),
1079                Token::Plus,
1080                Token::Ident("y".into()),
1081            ]
1082        );
1083    }
1084
1085    #[test]
1086    fn auto_semi_after_break_continue_return() {
1087        assert_eq!(
1088            toks("break\ncontinue\nreturn"),
1089            vec![
1090                Token::Break,
1091                Token::Semicolon,
1092                Token::Continue,
1093                Token::Semicolon,
1094                Token::Return,
1095            ]
1096        );
1097    }
1098
1099    #[test]
1100    fn auto_semi_after_true_false_none() {
1101        assert_eq!(
1102            toks("true\nfalse\nnone"),
1103            vec![
1104                Token::True,
1105                Token::Semicolon,
1106                Token::False,
1107                Token::Semicolon,
1108                Token::None,
1109            ]
1110        );
1111    }
1112
1113    // ─── Line tracking ─────────────────────────────────────────────
1114
1115    #[test]
1116    fn line_numbers() {
1117        let tokens = lex("x\ny\nz").unwrap();
1118        let lines: Vec<u32> = tokens.iter().map(|t| t.line).collect();
1119        // x(L1), ;(L1), y(L2), ;(L2), z(L3), Eof(L3)
1120        assert_eq!(lines, vec![1, 1, 2, 2, 3, 3]);
1121    }
1122
1123    // ─── Unknown character ─────────────────────────────────────────
1124
1125    #[test]
1126    fn unknown_char() {
1127        assert!(lex_err("@").contains("don't understand"));
1128    }
1129}