Skip to main content

leo_parser_rowan/
lexer.rs

1// Copyright (C) 2019-2026 Provable Inc.
2// This file is part of the Leo library.
3
4// The Leo library is free software: you can redistribute it and/or modify
5// it under the terms of the GNU General Public License as published by
6// the Free Software Foundation, either version 3 of the License, or
7// (at your option) any later version.
8
9// The Leo library is distributed in the hope that it will be useful,
10// but WITHOUT ANY WARRANTY; without even the implied warranty of
11// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12// GNU General Public License for more details.
13
14// You should have received a copy of the GNU General Public License
15// along with the Leo library. If not, see <https://www.gnu.org/licenses/>.
16
17//! Lexer for the rowan-based Leo parser.
18//!
19//! This module provides a logos-based lexer that produces tokens suitable for
20//! use with rowan's GreenNodeBuilder. All tokens, including whitespace and
21//! comments (trivia), are explicitly represented.
22
23use crate::{SyntaxKind, SyntaxKind::*};
24use logos::Logos;
25
26/// A token produced by the lexer.
27#[derive(Debug, Clone, Copy, PartialEq, Eq)]
28pub struct Token {
29    /// The kind of token.
30    pub kind: SyntaxKind,
31    /// The length in bytes of the token text.
32    pub len: u32,
33}
34
35/// An error encountered during lexing.
36#[derive(Debug, Clone, PartialEq, Eq)]
37pub struct LexError {
38    /// The byte offset where the error occurred.
39    pub offset: usize,
40    /// A description of the error.
41    pub message: String,
42}
43
44/// Callback for parsing block comments.
45///
46/// Block comments can't be matched with a simple regex due to the need to find
47/// the closing `*/`. This also detects bidi override characters for security.
48fn comment_block(lex: &mut logos::Lexer<LogosToken>) -> bool {
49    let mut last_asterisk = false;
50    for (index, c) in lex.remainder().char_indices() {
51        if c == '*' {
52            last_asterisk = true;
53        } else if c == '/' && last_asterisk {
54            lex.bump(index + 1);
55            return true;
56        } else if matches!(c, '\u{202A}'..='\u{202E}' | '\u{2066}'..='\u{2069}') {
57            // Bidi character detected - end the comment token here
58            // so we can report that error separately.
59            lex.bump(index);
60            return true;
61        } else {
62            last_asterisk = false;
63        }
64    }
65    // Unterminated block comment - consume all remaining input
66    let remaining = lex.remainder().len();
67    lex.bump(remaining);
68    true
69}
70
71/// Internal logos token enum.
72///
73/// This is mapped to `SyntaxKind` during lexing. We use a separate enum here
74/// because logos requires ownership of the token type during lexing.
75#[derive(Clone, Copy, Debug, PartialEq, Eq, Logos)]
76#[logos(skip r"")] // Don't skip anything - we want all tokens for lossless parsing
77enum LogosToken {
78    // =========================================================================
79    // Trivia
80    // =========================================================================
81    #[regex(r"[ \t\f]+")]
82    Whitespace,
83
84    #[regex(r"\r?\n")]
85    Linebreak,
86
87    // Comments don't include line breaks or bidi characters
88    #[regex(r"//[^\r\n\u{202A}-\u{202E}\u{2066}-\u{2069}]*")]
89    CommentLine,
90
91    #[token(r"/*", comment_block)]
92    CommentBlock,
93
94    // =========================================================================
95    // Literals
96    // =========================================================================
97    // Address literals: aleo1...
98    // We lex any length and validate later
99    #[regex(r"aleo1[a-z0-9]*")]
100    AddressLiteral,
101
102    // Integer literals with various radixes
103    // The regex includes type suffixes (u8, i32, field, etc.) to lex as a single token.
104    // Hex/octal/binary literals use uppercase for hex digits to avoid ambiguity with suffixes.
105    #[regex(r"0x[0-9A-F_]+([ui](8|16|32|64|128)|field|group|scalar)?")]
106    #[regex(r"0o[0-7_]+([ui](8|16|32|64|128)|field|group|scalar)?")]
107    #[regex(r"0b[01_]+([ui](8|16|32|64|128)|field|group|scalar)?")]
108    #[regex(r"[0-9][0-9_]*([ui](8|16|32|64|128)|field|group|scalar)?")]
109    Integer,
110
111    #[regex(r#""[^"]*""#)]
112    StaticString,
113
114    // =========================================================================
115    // Identifiers and Keywords
116    // =========================================================================
117    // Note: Complex identifiers (paths like foo::bar, program IDs like foo.aleo,
118    // locators like foo.aleo/bar) are deferred to Phase 2. The lexer produces
119    // simple tokens; the parser handles disambiguation.
120    //
121    // We need special cases for `group::abc`, `signature::abc`, and `Future::abc`
122    // as otherwise these would be keywords followed by ::.
123    #[regex(r"group::[a-zA-Z][a-zA-Z0-9_]*")]
124    #[regex(r"signature::[a-zA-Z][a-zA-Z0-9_]*")]
125    #[regex(r"Future::[a-zA-Z][a-zA-Z0-9_]*")]
126    PathSpecial,
127
128    // Identifiers starting with underscore (intrinsic names)
129    #[regex(r"_[a-zA-Z][a-zA-Z0-9_]*")]
130    IdentIntrinsic,
131
132    // Regular identifiers (keywords are matched by checking the slice)
133    #[regex(r"[a-zA-Z][a-zA-Z0-9_]*")]
134    Ident,
135
136    // =========================================================================
137    // Operators (multi-character first for correct priority)
138    // =========================================================================
139    #[token("**=")]
140    PowAssign,
141    #[token("&&=")]
142    AndAssign,
143    #[token("||=")]
144    OrAssign,
145    #[token("<<=")]
146    ShlAssign,
147    #[token(">>=")]
148    ShrAssign,
149
150    #[token("**")]
151    Pow,
152    #[token("&&")]
153    And,
154    #[token("||")]
155    Or,
156    #[token("<<")]
157    Shl,
158    #[token(">>")]
159    Shr,
160    #[token("==")]
161    EqEq,
162    #[token("!=")]
163    NotEq,
164    #[token("<=")]
165    LtEq,
166    #[token(">=")]
167    GtEq,
168    #[token("+=")]
169    AddAssign,
170    #[token("-=")]
171    SubAssign,
172    #[token("*=")]
173    MulAssign,
174    #[token("/=")]
175    DivAssign,
176    #[token("%=")]
177    RemAssign,
178    #[token("&=")]
179    BitAndAssign,
180    #[token("|=")]
181    BitOrAssign,
182    #[token("^=")]
183    BitXorAssign,
184
185    #[token("->")]
186    Arrow,
187    #[token("=>")]
188    FatArrow,
189    #[token("..")]
190    DotDot,
191    #[token("::")]
192    ColonColon,
193
194    // Single character operators and punctuation
195    #[token("=")]
196    Eq,
197    #[token("!")]
198    Bang,
199    #[token("<")]
200    Lt,
201    #[token(">")]
202    Gt,
203    #[token("+")]
204    Plus,
205    #[token("-")]
206    Minus,
207    #[token("*")]
208    Star,
209    #[token("/")]
210    Slash,
211    #[token("%")]
212    Percent,
213    #[token("&")]
214    Amp,
215    #[token("|")]
216    Pipe,
217    #[token("^")]
218    Caret,
219
220    // =========================================================================
221    // Punctuation
222    // =========================================================================
223    #[token("(")]
224    LParen,
225    #[token(")")]
226    RParen,
227    #[token("[")]
228    LBracket,
229    #[token("]")]
230    RBracket,
231    #[token("{")]
232    LBrace,
233    #[token("}")]
234    RBrace,
235    #[token(",")]
236    Comma,
237    #[token(".")]
238    Dot,
239    #[token(";")]
240    Semicolon,
241    #[token(":")]
242    Colon,
243    #[token("?")]
244    Question,
245    #[token("_")]
246    Underscore,
247    #[token("@")]
248    At,
249
250    // =========================================================================
251    // Security
252    // =========================================================================
253    // Unicode bidirectional control characters are a security risk.
254    // We detect them so we can report an error.
255    #[regex(r"[\u{202A}-\u{202E}\u{2066}-\u{2069}]")]
256    Bidi,
257}
258
259/// Convert an identifier slice to the appropriate SyntaxKind (keyword or IDENT).
260fn ident_to_kind(s: &str) -> SyntaxKind {
261    match s {
262        // Literal keywords
263        "true" => KW_TRUE,
264        "false" => KW_FALSE,
265        "none" => KW_NONE,
266        // Type keywords
267        "address" => KW_ADDRESS,
268        "bool" => KW_BOOL,
269        "field" => KW_FIELD,
270        "group" => KW_GROUP,
271        "scalar" => KW_SCALAR,
272        "signature" => KW_SIGNATURE,
273        "string" => KW_STRING,
274        "record" => KW_RECORD,
275        "Future" => KW_FUTURE,
276        "i8" => KW_I8,
277        "i16" => KW_I16,
278        "i32" => KW_I32,
279        "i64" => KW_I64,
280        "i128" => KW_I128,
281        "u8" => KW_U8,
282        "u16" => KW_U16,
283        "u32" => KW_U32,
284        "u64" => KW_U64,
285        "u128" => KW_U128,
286        // Control flow keywords
287        "if" => KW_IF,
288        "else" => KW_ELSE,
289        "for" => KW_FOR,
290        "in" => KW_IN,
291        "return" => KW_RETURN,
292        // Declaration keywords
293        "let" => KW_LET,
294        "const" => KW_CONST,
295        "constant" => KW_CONSTANT,
296        "function" => KW_FUNCTION,
297        "transition" => KW_TRANSITION,
298        "inline" => KW_INLINE,
299        "async" => KW_ASYNC,
300        "Fn" => KW_FN,
301        "struct" => KW_STRUCT,
302        "constructor" => KW_CONSTRUCTOR,
303        // Program structure keywords
304        "program" => KW_PROGRAM,
305        "import" => KW_IMPORT,
306        "mapping" => KW_MAPPING,
307        "storage" => KW_STORAGE,
308        "network" => KW_NETWORK,
309        "aleo" => KW_ALEO,
310        "script" => KW_SCRIPT,
311        "block" => KW_BLOCK,
312        // Visibility & assertion keywords
313        "public" => KW_PUBLIC,
314        "private" => KW_PRIVATE,
315        "as" => KW_AS,
316        "self" => KW_SELF,
317        "assert" => KW_ASSERT,
318        "assert_eq" => KW_ASSERT_EQ,
319        "assert_neq" => KW_ASSERT_NEQ,
320        // Not a keyword
321        _ => IDENT,
322    }
323}
324
325/// Lex the given source text into a sequence of tokens.
326///
327/// Returns a vector of tokens and any errors encountered. Even if errors
328/// occur, tokens are still produced to enable error recovery in the parser.
329pub fn lex(source: &str) -> (Vec<Token>, Vec<LexError>) {
330    let mut tokens = Vec::new();
331    let mut errors = Vec::new();
332    let mut lexer = LogosToken::lexer(source);
333
334    while let Some(result) = lexer.next() {
335        let span = lexer.span();
336        let len = (span.end - span.start) as u32;
337        let slice = lexer.slice();
338
339        let kind = match result {
340            Ok(token) => match token {
341                // Trivia
342                LogosToken::Whitespace => WHITESPACE,
343                LogosToken::Linebreak => LINEBREAK,
344                LogosToken::CommentLine => COMMENT_LINE,
345                LogosToken::CommentBlock => COMMENT_BLOCK,
346
347                // Literals
348                LogosToken::AddressLiteral => ADDRESS_LIT,
349                LogosToken::Integer => INTEGER,
350                LogosToken::StaticString => STRING,
351
352                // Identifiers (check for keywords)
353                LogosToken::Ident => ident_to_kind(slice),
354                LogosToken::IdentIntrinsic => IDENT,
355                LogosToken::PathSpecial => IDENT, // Treat as IDENT for now (Phase 2)
356
357                // Multi-char operators
358                LogosToken::PowAssign => STAR2_EQ,
359                LogosToken::AndAssign => AMP2_EQ,
360                LogosToken::OrAssign => PIPE2_EQ,
361                LogosToken::ShlAssign => SHL_EQ,
362                LogosToken::ShrAssign => SHR_EQ,
363                LogosToken::Pow => STAR2,
364                LogosToken::And => AMP2,
365                LogosToken::Or => PIPE2,
366                LogosToken::Shl => SHL,
367                LogosToken::Shr => SHR,
368                LogosToken::EqEq => EQ2,
369                LogosToken::NotEq => BANG_EQ,
370                LogosToken::LtEq => LT_EQ,
371                LogosToken::GtEq => GT_EQ,
372                LogosToken::AddAssign => PLUS_EQ,
373                LogosToken::SubAssign => MINUS_EQ,
374                LogosToken::MulAssign => STAR_EQ,
375                LogosToken::DivAssign => SLASH_EQ,
376                LogosToken::RemAssign => PERCENT_EQ,
377                LogosToken::BitAndAssign => AMP_EQ,
378                LogosToken::BitOrAssign => PIPE_EQ,
379                LogosToken::BitXorAssign => CARET_EQ,
380                LogosToken::Arrow => ARROW,
381                LogosToken::FatArrow => FAT_ARROW,
382                LogosToken::DotDot => DOT_DOT,
383                LogosToken::ColonColon => COLON_COLON,
384
385                // Single-char operators
386                LogosToken::Eq => EQ,
387                LogosToken::Bang => BANG,
388                LogosToken::Lt => LT,
389                LogosToken::Gt => GT,
390                LogosToken::Plus => PLUS,
391                LogosToken::Minus => MINUS,
392                LogosToken::Star => STAR,
393                LogosToken::Slash => SLASH,
394                LogosToken::Percent => PERCENT,
395                LogosToken::Amp => AMP,
396                LogosToken::Pipe => PIPE,
397                LogosToken::Caret => CARET,
398
399                // Punctuation
400                LogosToken::LParen => L_PAREN,
401                LogosToken::RParen => R_PAREN,
402                LogosToken::LBracket => L_BRACKET,
403                LogosToken::RBracket => R_BRACKET,
404                LogosToken::LBrace => L_BRACE,
405                LogosToken::RBrace => R_BRACE,
406                LogosToken::Comma => COMMA,
407                LogosToken::Dot => DOT,
408                LogosToken::Semicolon => SEMICOLON,
409                LogosToken::Colon => COLON,
410                LogosToken::Question => QUESTION,
411                LogosToken::Underscore => UNDERSCORE,
412                LogosToken::At => AT,
413
414                // Security: bidi characters
415                LogosToken::Bidi => {
416                    errors.push(LexError {
417                        offset: span.start,
418                        message: "Unicode bidirectional override character detected".to_string(),
419                    });
420                    ERROR
421                }
422            },
423            Err(()) => {
424                errors.push(LexError { offset: span.start, message: format!("unexpected character: {:?}", slice) });
425                ERROR
426            }
427        };
428
429        tokens.push(Token { kind, len });
430    }
431
432    // Add EOF token
433    tokens.push(Token { kind: EOF, len: 0 });
434
435    (tokens, errors)
436}
437
438#[cfg(test)]
439mod tests {
440    use super::*;
441    use expect_test::{Expect, expect};
442
443    /// Helper to format tokens for snapshot testing.
444    fn check_lex(input: &str, expect: Expect) {
445        let (tokens, _errors) = lex(input);
446        let mut output = String::new();
447        let mut offset = 0usize;
448        for token in &tokens {
449            let text = &input[offset..offset + token.len as usize];
450            output.push_str(&format!("{:?} {:?}\n", token.kind, text));
451            offset += token.len as usize;
452        }
453        expect.assert_eq(&output);
454    }
455
456    /// Helper to check that lexing produces expected errors.
457    fn check_lex_errors(input: &str, expect: Expect) {
458        let (_tokens, errors) = lex(input);
459        let output = errors.iter().map(|e| format!("{}:{}", e.offset, e.message)).collect::<Vec<_>>().join("\n");
460        expect.assert_eq(&output);
461    }
462
463    #[test]
464    fn lex_empty() {
465        check_lex("", expect![[r#"
466            EOF ""
467        "#]]);
468    }
469
470    #[test]
471    fn lex_whitespace() {
472        check_lex("  \t  ", expect![[r#"
473            WHITESPACE "  \t  "
474            EOF ""
475        "#]]);
476    }
477
478    #[test]
479    fn lex_linebreaks() {
480        check_lex("\n\r\n\n", expect![[r#"
481            LINEBREAK "\n"
482            LINEBREAK "\r\n"
483            LINEBREAK "\n"
484            EOF ""
485"#]]);
486    }
487
488    #[test]
489    fn lex_mixed_whitespace() {
490        check_lex("  \n  \t\n", expect![[r#"
491            WHITESPACE "  "
492            LINEBREAK "\n"
493            WHITESPACE "  \t"
494            LINEBREAK "\n"
495            EOF ""
496        "#]]);
497    }
498
499    #[test]
500    fn lex_line_comments() {
501        check_lex("// hello\n// world", expect![[r#"
502            COMMENT_LINE "// hello"
503            LINEBREAK "\n"
504            COMMENT_LINE "// world"
505            EOF ""
506        "#]]);
507    }
508
509    #[test]
510    fn lex_block_comments() {
511        check_lex("/* hello */ /* multi\nline */", expect![[r#"
512            COMMENT_BLOCK "/* hello */"
513            WHITESPACE " "
514            COMMENT_BLOCK "/* multi\nline */"
515            EOF ""
516        "#]]);
517    }
518
519    #[test]
520    fn lex_identifiers() {
521        check_lex("foo Bar _baz x123", expect![[r#"
522            IDENT "foo"
523            WHITESPACE " "
524            IDENT "Bar"
525            WHITESPACE " "
526            IDENT "_baz"
527            WHITESPACE " "
528            IDENT "x123"
529            EOF ""
530        "#]]);
531    }
532
533    #[test]
534    fn lex_keywords() {
535        check_lex("let function if return true false", expect![[r#"
536            KW_LET "let"
537            WHITESPACE " "
538            KW_FUNCTION "function"
539            WHITESPACE " "
540            KW_IF "if"
541            WHITESPACE " "
542            KW_RETURN "return"
543            WHITESPACE " "
544            KW_TRUE "true"
545            WHITESPACE " "
546            KW_FALSE "false"
547            EOF ""
548        "#]]);
549    }
550
551    #[test]
552    fn lex_type_keywords() {
553        check_lex("u8 u16 u32 u64 u128 i8 i16 i32 i64 i128", expect![[r#"
554            KW_U8 "u8"
555            WHITESPACE " "
556            KW_U16 "u16"
557            WHITESPACE " "
558            KW_U32 "u32"
559            WHITESPACE " "
560            KW_U64 "u64"
561            WHITESPACE " "
562            KW_U128 "u128"
563            WHITESPACE " "
564            KW_I8 "i8"
565            WHITESPACE " "
566            KW_I16 "i16"
567            WHITESPACE " "
568            KW_I32 "i32"
569            WHITESPACE " "
570            KW_I64 "i64"
571            WHITESPACE " "
572            KW_I128 "i128"
573            EOF ""
574        "#]]);
575    }
576
577    #[test]
578    fn lex_more_type_keywords() {
579        check_lex("bool field group scalar address signature string record", expect![[r#"
580            KW_BOOL "bool"
581            WHITESPACE " "
582            KW_FIELD "field"
583            WHITESPACE " "
584            KW_GROUP "group"
585            WHITESPACE " "
586            KW_SCALAR "scalar"
587            WHITESPACE " "
588            KW_ADDRESS "address"
589            WHITESPACE " "
590            KW_SIGNATURE "signature"
591            WHITESPACE " "
592            KW_STRING "string"
593            WHITESPACE " "
594            KW_RECORD "record"
595            EOF ""
596        "#]]);
597    }
598
599    #[test]
600    fn lex_integers() {
601        check_lex("123 0xFF 0b101 0o77", expect![[r#"
602            INTEGER "123"
603            WHITESPACE " "
604            INTEGER "0xFF"
605            WHITESPACE " "
606            INTEGER "0b101"
607            WHITESPACE " "
608            INTEGER "0o77"
609            EOF ""
610        "#]]);
611    }
612
613    #[test]
614    fn lex_integers_with_underscores() {
615        check_lex("1_000_000 0xFF_FF", expect![[r#"
616            INTEGER "1_000_000"
617            WHITESPACE " "
618            INTEGER "0xFF_FF"
619            EOF ""
620        "#]]);
621    }
622
623    #[test]
624    fn lex_address_literal() {
625        check_lex("aleo1abc123", expect![[r#"
626            ADDRESS_LIT "aleo1abc123"
627            EOF ""
628        "#]]);
629    }
630
631    #[test]
632    fn lex_strings() {
633        check_lex(r#""hello" "world""#, expect![[r#"
634            STRING "\"hello\""
635            WHITESPACE " "
636            STRING "\"world\""
637            EOF ""
638        "#]]);
639    }
640
641    #[test]
642    fn lex_punctuation() {
643        check_lex("( ) [ ] { } , . ; : :: ? -> => _ @", expect![[r#"
644            L_PAREN "("
645            WHITESPACE " "
646            R_PAREN ")"
647            WHITESPACE " "
648            L_BRACKET "["
649            WHITESPACE " "
650            R_BRACKET "]"
651            WHITESPACE " "
652            L_BRACE "{"
653            WHITESPACE " "
654            R_BRACE "}"
655            WHITESPACE " "
656            COMMA ","
657            WHITESPACE " "
658            DOT "."
659            WHITESPACE " "
660            SEMICOLON ";"
661            WHITESPACE " "
662            COLON ":"
663            WHITESPACE " "
664            COLON_COLON "::"
665            WHITESPACE " "
666            QUESTION "?"
667            WHITESPACE " "
668            ARROW "->"
669            WHITESPACE " "
670            FAT_ARROW "=>"
671            WHITESPACE " "
672            UNDERSCORE "_"
673            WHITESPACE " "
674            AT "@"
675            EOF ""
676        "#]]);
677    }
678
679    #[test]
680    fn lex_arithmetic_operators() {
681        check_lex("+ - * / % **", expect![[r#"
682            PLUS "+"
683            WHITESPACE " "
684            MINUS "-"
685            WHITESPACE " "
686            STAR "*"
687            WHITESPACE " "
688            SLASH "/"
689            WHITESPACE " "
690            PERCENT "%"
691            WHITESPACE " "
692            STAR2 "**"
693            EOF ""
694        "#]]);
695    }
696
697    #[test]
698    fn lex_comparison_operators() {
699        check_lex("== != < <= > >=", expect![[r#"
700            EQ2 "=="
701            WHITESPACE " "
702            BANG_EQ "!="
703            WHITESPACE " "
704            LT "<"
705            WHITESPACE " "
706            LT_EQ "<="
707            WHITESPACE " "
708            GT ">"
709            WHITESPACE " "
710            GT_EQ ">="
711            EOF ""
712        "#]]);
713    }
714
715    #[test]
716    fn lex_logical_operators() {
717        check_lex("&& || !", expect![[r#"
718            AMP2 "&&"
719            WHITESPACE " "
720            PIPE2 "||"
721            WHITESPACE " "
722            BANG "!"
723            EOF ""
724        "#]]);
725    }
726
727    #[test]
728    fn lex_bitwise_operators() {
729        check_lex("& | ^ << >>", expect![[r#"
730            AMP "&"
731            WHITESPACE " "
732            PIPE "|"
733            WHITESPACE " "
734            CARET "^"
735            WHITESPACE " "
736            SHL "<<"
737            WHITESPACE " "
738            SHR ">>"
739            EOF ""
740        "#]]);
741    }
742
743    #[test]
744    fn lex_assignment_operators() {
745        check_lex("= += -= *= /= %= **= &&= ||=", expect![[r#"
746            EQ "="
747            WHITESPACE " "
748            PLUS_EQ "+="
749            WHITESPACE " "
750            MINUS_EQ "-="
751            WHITESPACE " "
752            STAR_EQ "*="
753            WHITESPACE " "
754            SLASH_EQ "/="
755            WHITESPACE " "
756            PERCENT_EQ "%="
757            WHITESPACE " "
758            STAR2_EQ "**="
759            WHITESPACE " "
760            AMP2_EQ "&&="
761            WHITESPACE " "
762            PIPE2_EQ "||="
763            EOF ""
764        "#]]);
765    }
766
767    #[test]
768    fn lex_more_assignment_operators() {
769        check_lex("&= |= ^= <<= >>=", expect![[r#"
770            AMP_EQ "&="
771            WHITESPACE " "
772            PIPE_EQ "|="
773            WHITESPACE " "
774            CARET_EQ "^="
775            WHITESPACE " "
776            SHL_EQ "<<="
777            WHITESPACE " "
778            SHR_EQ ">>="
779            EOF ""
780        "#]]);
781    }
782
783    #[test]
784    fn lex_dot_dot() {
785        check_lex("0..10", expect![[r#"
786            INTEGER "0"
787            DOT_DOT ".."
788            INTEGER "10"
789            EOF ""
790        "#]]);
791    }
792
793    #[test]
794    fn lex_simple_expression() {
795        check_lex("x + y * 2", expect![[r#"
796            IDENT "x"
797            WHITESPACE " "
798            PLUS "+"
799            WHITESPACE " "
800            IDENT "y"
801            WHITESPACE " "
802            STAR "*"
803            WHITESPACE " "
804            INTEGER "2"
805            EOF ""
806        "#]]);
807    }
808
809    #[test]
810    fn lex_function_call() {
811        check_lex("foo(a, b)", expect![[r#"
812            IDENT "foo"
813            L_PAREN "("
814            IDENT "a"
815            COMMA ","
816            WHITESPACE " "
817            IDENT "b"
818            R_PAREN ")"
819            EOF ""
820        "#]]);
821    }
822
823    #[test]
824    fn lex_function_definition() {
825        check_lex("function add(x: u32) -> u32 {", expect![[r#"
826            KW_FUNCTION "function"
827            WHITESPACE " "
828            IDENT "add"
829            L_PAREN "("
830            IDENT "x"
831            COLON ":"
832            WHITESPACE " "
833            KW_U32 "u32"
834            R_PAREN ")"
835            WHITESPACE " "
836            ARROW "->"
837            WHITESPACE " "
838            KW_U32 "u32"
839            WHITESPACE " "
840            L_BRACE "{"
841            EOF ""
842        "#]]);
843    }
844
845    #[test]
846    fn lex_let_statement() {
847        check_lex("let x: u32 = 42;", expect![[r#"
848            KW_LET "let"
849            WHITESPACE " "
850            IDENT "x"
851            COLON ":"
852            WHITESPACE " "
853            KW_U32 "u32"
854            WHITESPACE " "
855            EQ "="
856            WHITESPACE " "
857            INTEGER "42"
858            SEMICOLON ";"
859            EOF ""
860        "#]]);
861    }
862
863    #[test]
864    fn lex_typed_integers() {
865        // Integer literals with type suffixes should be lexed as single tokens
866        check_lex("1000u32 42i64 0u8 255u128", expect![[r#"
867            INTEGER "1000u32"
868            WHITESPACE " "
869            INTEGER "42i64"
870            WHITESPACE " "
871            INTEGER "0u8"
872            WHITESPACE " "
873            INTEGER "255u128"
874            EOF ""
875        "#]]);
876    }
877
878    #[test]
879    fn lex_typed_integers_field() {
880        // Field, group, and scalar suffixes
881        check_lex("123field 456group 789scalar", expect![[r#"
882            INTEGER "123field"
883            WHITESPACE " "
884            INTEGER "456group"
885            WHITESPACE " "
886            INTEGER "789scalar"
887            EOF ""
888        "#]]);
889    }
890
891    #[test]
892    fn lex_special_paths() {
893        // These are special cases where keywords are followed by ::
894        check_lex("group::GEN signature::verify Future::await", expect![[r#"
895            IDENT "group::GEN"
896            WHITESPACE " "
897            IDENT "signature::verify"
898            WHITESPACE " "
899            IDENT "Future::await"
900            EOF ""
901        "#]]);
902    }
903
904    #[test]
905    fn lex_typed_integer_range() {
906        // Integer with type suffix followed by range operator
907        check_lex("0u8..STOP", expect![[r#"
908            INTEGER "0u8"
909            DOT_DOT ".."
910            IDENT "STOP"
911            EOF ""
912        "#]]);
913    }
914
915    #[test]
916    fn lex_error_unknown_char() {
917        check_lex_errors("hello $ world", expect![[r#"6:unexpected character: "$""#]]);
918    }
919}