husk_lang/
lexer.rs

1use std::fmt;
2
3use crate::span::Span;
4
5#[derive(Debug, PartialEq, Clone)]
6pub struct Token {
7    pub kind: TokenKind,
8    pub span: Span,
9}
10
11impl Token {
12    #[allow(unused)]
13    pub fn new(kind: TokenKind, start: usize, end: usize) -> Token {
14        Token {
15            kind,
16            span: Span::new(start, end),
17        }
18    }
19}
20
21pub const EOF: Token = Token {
22    kind: TokenKind::Eof,
23    span: Span { start: 0, end: 0 },
24};
25
26#[derive(Debug, PartialEq, Clone)]
27pub enum TokenKind {
28    Function,
29    Struct,
30    Impl,
31    Enum,
32    Let,
33    Use,
34    Pub,
35    Mod,
36    Extern,
37    Async,
38    As,
39    If,
40    Else,
41    Match,
42    Loop,
43    While,
44    For,
45    In,
46    Break,
47    Continue,
48    Return,
49    Identifier(String),
50    Int(i64),
51    Float(f64),
52    String(String),
53    Bool(bool),
54    Equals,
55    DblEquals,
56    Semicolon,
57    LParen,
58    RParen,
59    LBrace,
60    RBrace,
61    LSquare,
62    RSquare,
63    Comma,
64    Arrow,
65    FatArrow,
66    Type(String),
67    Plus,
68    Minus,
69    Asterisk,
70    Slash,
71    Percent,
72    PlusEquals,
73    MinusEquals,
74    StarEquals,
75    SlashEquals,
76    PercentEquals,
77    LessThan,
78    LessThanEquals,
79    GreaterThan,
80    GreaterThanEquals,
81    BangEquals,
82    DblAmpersand,
83    DblPipe,
84    Pipe,
85    Colon,
86    DblColon,
87    Dot,
88    DblDot,
89    DblDotEquals,
90    Underscore,
91    Bang,
92    Question,
93    Hash,
94    Error(String),
95    Eof,
96}
97
98impl TokenKind {
99    pub fn is_identifier(&self) -> bool {
100        matches!(self, TokenKind::Identifier(_))
101    }
102}
103
104impl fmt::Display for TokenKind {
105    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
106        let s = match self {
107            TokenKind::Function => "Function",
108            TokenKind::Struct => "Struct",
109            TokenKind::Impl => "Impl",
110            TokenKind::Enum => "Enum",
111            TokenKind::Let => "Let",
112            TokenKind::Use => "Use",
113            TokenKind::Pub => "Pub",
114            TokenKind::Mod => "Mod",
115            TokenKind::Extern => "Extern",
116            TokenKind::Async => "Async",
117            TokenKind::As => "As",
118            TokenKind::If => "If",
119            TokenKind::Else => "Else",
120            TokenKind::Match => "Match",
121            TokenKind::Loop => "Loop",
122            TokenKind::While => "While",
123            TokenKind::For => "For",
124            TokenKind::In => "In",
125            TokenKind::Break => "Break",
126            TokenKind::Continue => "Continue",
127            TokenKind::Return => "Return",
128            TokenKind::Identifier(s) => s,
129            TokenKind::Int(i) => return write!(f, "{}", i),
130            TokenKind::Float(fl) => return write!(f, "{}", fl),
131            TokenKind::String(s) => s,
132            TokenKind::Bool(b) => return write!(f, "{}", b),
133            TokenKind::Equals => "=",
134            TokenKind::DblEquals => "==",
135            TokenKind::Semicolon => ";",
136            TokenKind::LParen => "(",
137            TokenKind::RParen => ")",
138            TokenKind::LBrace => "{",
139            TokenKind::RBrace => "}",
140            TokenKind::LSquare => "[",
141            TokenKind::RSquare => "]",
142            TokenKind::Comma => ",",
143            TokenKind::Arrow => "->",
144            TokenKind::FatArrow => "=>",
145            TokenKind::Type(s) => s,
146            TokenKind::Plus => "+",
147            TokenKind::Minus => "-",
148            TokenKind::Asterisk => "*",
149            TokenKind::Slash => "/",
150            TokenKind::Percent => "%",
151            TokenKind::PlusEquals => "+=",
152            TokenKind::MinusEquals => "-=",
153            TokenKind::StarEquals => "*=",
154            TokenKind::SlashEquals => "/=",
155            TokenKind::PercentEquals => "%=",
156            TokenKind::LessThan => "<",
157            TokenKind::LessThanEquals => "<=",
158            TokenKind::GreaterThan => ">",
159            TokenKind::GreaterThanEquals => ">=",
160            TokenKind::BangEquals => "!=",
161            TokenKind::DblAmpersand => "&&",
162            TokenKind::DblPipe => "||",
163            TokenKind::Pipe => "|",
164            TokenKind::Colon => ":",
165            TokenKind::DblColon => "::",
166            TokenKind::Dot => ".",
167            TokenKind::DblDot => "..",
168            TokenKind::DblDotEquals => "..=",
169            TokenKind::Underscore => "_",
170            TokenKind::Bang => "!",
171            TokenKind::Question => "?",
172            TokenKind::Hash => "#",
173            TokenKind::Error(s) => s,
174            TokenKind::Eof => "EOF",
175        };
176        write!(f, "{}", s)
177    }
178}
179
180#[derive(Debug)]
181pub struct Lexer {
182    input: String,
183    position: usize,       // current byte position in input
184    read_position: usize,  // next byte position to read
185    ch: Option<char>,      // current character
186    start_position: usize, // byte position where current token started
187}
188
189impl Lexer {
190    pub fn new(input: impl Into<String>) -> Lexer {
191        let input = input.into();
192        let mut l = Lexer {
193            input,
194            position: 0,
195            read_position: 0,
196            ch: None,
197            start_position: 0,
198        };
199        l.read_char();
200        l
201    }
202
203    pub fn lex_all(&mut self) -> Vec<Token> {
204        let mut tokens = Vec::new();
205        loop {
206            let token = self.next_token();
207            if token.kind == TokenKind::Eof {
208                break;
209            }
210            tokens.push(token);
211        }
212        tokens
213    }
214
215    fn read_char(&mut self) {
216        self.position = self.read_position;
217
218        if self.read_position >= self.input.len() {
219            self.ch = None;
220            return;
221        }
222
223        // Get the character at the current byte position
224        let mut chars = self.input[self.read_position..].chars();
225        if let Some(ch) = chars.next() {
226            self.ch = Some(ch);
227            // Advance read_position by the byte length of this character
228            self.read_position += ch.len_utf8();
229        } else {
230            self.ch = None;
231        }
232    }
233
234    fn peek_char(&self) -> Option<char> {
235        if self.read_position >= self.input.len() {
236            None
237        } else {
238            self.input[self.read_position..].chars().next()
239        }
240    }
241
242    fn skip_whitespace(&mut self) {
243        while let Some(c) = self.ch {
244            if c.is_whitespace() {
245                self.read_char();
246            } else {
247                break;
248            }
249        }
250    }
251
252    pub fn next_token(&mut self) -> Token {
253        // Token generation logic with line and column information
254        self.skip_whitespace();
255        self.start_position = self.position;
256        let token = match self.ch {
257            Some('=') => {
258                if self.peek_char() == Some('=') {
259                    self.read_char();
260                    self.create_token(TokenKind::DblEquals)
261                } else if self.peek_char() == Some('>') {
262                    self.read_char();
263                    self.create_token(TokenKind::FatArrow)
264                } else {
265                    self.create_token(TokenKind::Equals)
266                }
267            }
268            Some(';') => self.create_token(TokenKind::Semicolon),
269            Some('(') => self.create_token(TokenKind::LParen),
270            Some(')') => self.create_token(TokenKind::RParen),
271            Some('{') => self.create_token(TokenKind::LBrace),
272            Some('}') => self.create_token(TokenKind::RBrace),
273            Some('[') => self.create_token(TokenKind::LSquare),
274            Some(']') => self.create_token(TokenKind::RSquare),
275            Some('>') => {
276                if self.peek_char() == Some('=') {
277                    self.read_char();
278                    self.create_token(TokenKind::GreaterThanEquals)
279                } else {
280                    self.create_token(TokenKind::GreaterThan)
281                }
282            }
283            Some('<') => {
284                if self.peek_char() == Some('=') {
285                    self.read_char();
286                    self.create_token(TokenKind::LessThanEquals)
287                } else {
288                    self.create_token(TokenKind::LessThan)
289                }
290            }
291            Some(',') => self.create_token(TokenKind::Comma),
292            Some('+') => {
293                if self.peek_char() == Some('=') {
294                    self.read_char();
295                    self.create_token(TokenKind::PlusEquals)
296                } else {
297                    self.create_token(TokenKind::Plus)
298                }
299            }
300            Some('-') => {
301                if self.peek_char() == Some('=') {
302                    self.read_char();
303                    self.create_token(TokenKind::MinusEquals)
304                } else if self.peek_char() == Some('>') {
305                    self.read_char();
306                    self.create_token(TokenKind::Arrow)
307                } else {
308                    self.create_token(TokenKind::Minus)
309                }
310            }
311            Some('*') => {
312                if self.peek_char() == Some('=') {
313                    self.read_char();
314                    self.create_token(TokenKind::StarEquals)
315                } else {
316                    self.create_token(TokenKind::Asterisk)
317                }
318            }
319            Some(':') => {
320                if self.peek_char() == Some(':') {
321                    self.read_char();
322                    self.create_token(TokenKind::DblColon)
323                } else {
324                    self.create_token(TokenKind::Colon)
325                }
326            }
327            Some('/') => {
328                if self.peek_char() == Some('/') {
329                    while let Some(c) = self.ch {
330                        if c == '\n' {
331                            break;
332                        }
333                        self.read_char();
334                    }
335                    return self.next_token();
336                } else if self.peek_char() == Some('*') {
337                    while let Some(c) = self.ch {
338                        if c == '*' && self.peek_char() == Some('/') {
339                            self.read_char();
340                            self.read_char();
341                            break;
342                        }
343                        self.read_char();
344                    }
345                    return self.next_token();
346                } else if self.peek_char() == Some('=') {
347                    self.read_char();
348                    self.create_token(TokenKind::SlashEquals)
349                } else {
350                    self.create_token(TokenKind::Slash)
351                }
352            }
353            Some('%') => {
354                if self.peek_char() == Some('=') {
355                    self.read_char();
356                    self.create_token(TokenKind::PercentEquals)
357                } else {
358                    self.create_token(TokenKind::Percent)
359                }
360            }
361            Some('.') => {
362                let start = self.position;
363                if self.peek_char() == Some('.') {
364                    self.read_char();
365
366                    if self.peek_char() == Some('=') {
367                        self.read_char();
368                        self.read_char();
369                        return Token::new(TokenKind::DblDotEquals, start, self.position);
370                    }
371
372                    self.read_char();
373                    return Token::new(TokenKind::DblDot, start, self.position);
374                } else {
375                    self.create_token(TokenKind::Dot)
376                }
377            }
378            Some('"') => self.read_string(),
379            Some('_') => {
380                if self.is_identifier_start(self.peek_char()) {
381                    self.read_identifier_or_type()
382                } else {
383                    self.create_token(TokenKind::Underscore)
384                }
385            }
386            Some('!') => {
387                if self.peek_char() == Some('=') {
388                    self.read_char();
389                    self.create_token(TokenKind::BangEquals)
390                } else {
391                    self.create_token(TokenKind::Bang)
392                }
393            }
394            Some('&') => {
395                if self.peek_char() == Some('&') {
396                    self.read_char();
397                    self.create_token(TokenKind::DblAmpersand)
398                } else {
399                    self.create_token(TokenKind::Error("Unexpected character: &".to_string()))
400                }
401            }
402            Some('|') => {
403                if self.peek_char() == Some('|') {
404                    self.read_char();
405                    self.create_token(TokenKind::DblPipe)
406                } else {
407                    self.create_token(TokenKind::Pipe)
408                }
409            }
410            Some('?') => self.create_token(TokenKind::Question),
411            Some('#') => self.create_token(TokenKind::Hash),
412            Some(c) => {
413                if c.is_alphabetic() {
414                    let token = self.read_identifier_or_type();
415                    return token;
416                } else if c.is_ascii_digit() {
417                    return self.read_number();
418                } else {
419                    self.create_token(TokenKind::Error(format!("Unexpected character: {}", c)))
420                }
421            }
422            None => self.create_token_no_advance(TokenKind::Eof),
423        };
424        self.read_char();
425        token
426    }
427
428    fn is_identifier_start(&self, c: Option<char>) -> bool {
429        c.is_some_and(|c| c.is_alphabetic() || c == '_')
430    }
431
432    fn is_identifier_char(&self, c: char) -> bool {
433        c.is_alphanumeric() || c == '_'
434    }
435
436    fn create_token(&self, kind: TokenKind) -> Token {
437        Token {
438            kind,
439            span: Span::new(self.start_position, self.read_position),
440        }
441    }
442
443    fn create_token_no_advance(&self, kind: TokenKind) -> Token {
444        Token {
445            kind,
446            span: Span::new(self.start_position, self.position),
447        }
448    }
449
450    fn read_identifier_or_type(&mut self) -> Token {
451        let start_position = self.position;
452        while let Some(c) = self.ch {
453            if self.is_identifier_char(c) {
454                self.read_char();
455            } else {
456                break;
457            }
458        }
459        let mut identifier: String = self.input[start_position..self.position].to_string();
460
461        // Special handling for format! - check if identifier is "format" followed by "!"
462        if identifier == "format" && self.ch == Some('!') {
463            self.read_char(); // consume the '!'
464            identifier.push('!');
465        }
466
467        let kind = match identifier.as_str() {
468            "struct" => TokenKind::Struct,
469            "impl" => TokenKind::Impl,
470            "enum" => TokenKind::Enum,
471            "false" => TokenKind::Bool(false),
472            "true" => TokenKind::Bool(true),
473            "fn" => TokenKind::Function,
474            "let" => TokenKind::Let,
475            "use" => TokenKind::Use,
476            "pub" => TokenKind::Pub,
477            "mod" => TokenKind::Mod,
478            "extern" => TokenKind::Extern,
479            "async" => TokenKind::Async,
480            "as" => TokenKind::As,
481            "if" => TokenKind::If,
482            "else" => TokenKind::Else,
483            "match" => TokenKind::Match,
484            "loop" => TokenKind::Loop,
485            "while" => TokenKind::While,
486            "for" => TokenKind::For,
487            "in" => TokenKind::In,
488            "break" => TokenKind::Break,
489            "continue" => TokenKind::Continue,
490            "return" => TokenKind::Return,
491            "int" | "float" | "bool" | "string" => TokenKind::Type(identifier),
492            "_" => TokenKind::Underscore,
493            _ => TokenKind::Identifier(identifier),
494        };
495
496        self.create_token_no_advance(kind)
497    }
498
499    fn read_string(&mut self) -> Token {
500        self.read_char(); // Skip opening quote
501        let mut string = String::new();
502
503        while let Some(c) = self.ch {
504            if c == '"' {
505                break;
506            } else if c == '\\' {
507                // Handle escape sequences
508                self.read_char(); // Consume backslash
509                match self.ch {
510                    Some('n') => string.push('\n'),
511                    Some('t') => string.push('\t'),
512                    Some('r') => string.push('\r'),
513                    Some('\\') => string.push('\\'),
514                    Some('"') => string.push('"'),
515                    Some('\'') => string.push('\''),
516                    Some('0') => string.push('\0'),
517                    Some(ch) => {
518                        // Unknown escape sequence, just include as-is
519                        string.push('\\');
520                        string.push(ch);
521                    }
522                    None => {
523                        // String ended with backslash
524                        string.push('\\');
525                        break;
526                    }
527                }
528            } else {
529                string.push(c);
530            }
531            self.read_char();
532        }
533
534        self.create_token(TokenKind::String(string))
535    }
536
537    fn read_number(&mut self) -> Token {
538        let position = self.position;
539        while let Some(c) = self.ch {
540            if c == '.' && self.peek_char() == Some('.') {
541                break;
542            }
543            if !c.is_ascii_digit() && (c != '.') {
544                break;
545            }
546            self.read_char();
547        }
548
549        let str = self.input[position..self.position].to_string();
550
551        if str.contains('.') {
552            let num = str.parse().unwrap();
553            return self.create_token_no_advance(TokenKind::Float(num));
554        }
555
556        let num = str.parse().unwrap();
557        self.create_token_no_advance(TokenKind::Int(num))
558    }
559}
560
561#[cfg(test)]
562mod tests {
563    use super::*;
564
565    #[test]
566    fn test_float() {
567        let input = "let x = 10.5;";
568        let mut lexer = Lexer::new(input.to_string());
569        let expected_tokens = vec![
570            TokenKind::Let,
571            TokenKind::Identifier("x".to_string()),
572            TokenKind::Equals,
573            TokenKind::Float(10.5),
574            TokenKind::Semicolon,
575        ];
576
577        for expected in expected_tokens {
578            let token = lexer.next_token();
579            assert_eq!(token.kind, expected);
580        }
581    }
582
583    #[test]
584    fn test_next_token() {
585        let input = r#"
586            let five = 5;
587            fn add(x, y) {
588                x + y;
589            }
590        "#;
591        let mut lexer = Lexer::new(input.to_string());
592
593        let expected_tokens = vec![
594            TokenKind::Let,
595            TokenKind::Identifier("five".to_string()),
596            TokenKind::Equals,
597            TokenKind::Int(5),
598            TokenKind::Semicolon,
599            TokenKind::Function,
600            TokenKind::Identifier("add".to_string()),
601            TokenKind::LParen,
602            TokenKind::Identifier("x".to_string()),
603            TokenKind::Comma,
604            TokenKind::Identifier("y".to_string()),
605            TokenKind::RParen,
606            TokenKind::LBrace,
607            TokenKind::Identifier("x".to_string()),
608            TokenKind::Plus,
609            TokenKind::Identifier("y".to_string()),
610            TokenKind::Semicolon,
611            TokenKind::RBrace,
612            TokenKind::Eof,
613        ];
614
615        for expected in expected_tokens {
616            let token = lexer.next_token();
617            assert_eq!(token.kind, expected);
618        }
619    }
620
621    #[test]
622    fn test_lex_let_int() {
623        let input = r#"let age = 1;"#;
624        let mut lexer = Lexer::new(input.to_string());
625        let expected_tokens = vec![
626            Token::new(TokenKind::Let, 0, 3),
627            Token::new(TokenKind::Identifier("age".to_string()), 4, 7),
628            Token::new(TokenKind::Equals, 8, 9),
629            Token::new(TokenKind::Int(1), 10, 11),
630            Token::new(TokenKind::Semicolon, 11, 12),
631            Token::new(TokenKind::Eof, 12, 12),
632        ];
633
634        for expected in expected_tokens {
635            let token = lexer.next_token();
636            assert_eq!(token, expected);
637        }
638    }
639
640    #[test]
641    fn test_lex_let_string() {
642        let input = r#"let name = "Felipe";"#;
643        let mut lexer = Lexer::new(input.to_string());
644        let expected_tokens = vec![
645            Token::new(TokenKind::Let, 0, 3),
646            Token::new(TokenKind::Identifier("name".to_string()), 4, 8),
647            Token::new(TokenKind::Equals, 9, 10),
648            Token::new(TokenKind::String("Felipe".to_string()), 11, 19),
649            Token::new(TokenKind::Semicolon, 19, 20),
650            Token::new(TokenKind::Eof, 20, 20),
651        ];
652
653        for expected in expected_tokens {
654            let token = lexer.next_token();
655            assert_eq!(token, expected);
656        }
657    }
658
659    #[test]
660    fn test_lex_let_bool() {
661        let input = r#"let is_true = true;"#;
662        let mut lexer = Lexer::new(input.to_string());
663        let expected_tokens = vec![
664            Token::new(TokenKind::Let, 0, 3),
665            Token::new(TokenKind::Identifier("is_true".to_string()), 4, 11),
666            Token::new(TokenKind::Equals, 12, 13),
667            Token::new(TokenKind::Bool(true), 14, 18),
668            Token::new(TokenKind::Semicolon, 18, 19),
669            Token::new(TokenKind::Eof, 19, 19),
670        ];
671
672        for expected in expected_tokens {
673            let token = lexer.next_token();
674            assert_eq!(token, expected);
675        }
676    }
677
678    #[test]
679    fn test_lex_let_string_newline() {
680        let input = r#"
681            let name = "Felipe";
682        "#;
683        let mut lexer = Lexer::new(input.to_string());
684        let expected_tokens = vec![
685            Token::new(TokenKind::Let, 13, 16),
686            Token::new(TokenKind::Identifier("name".to_string()), 17, 21),
687            Token::new(TokenKind::Equals, 22, 23),
688            Token::new(TokenKind::String("Felipe".to_string()), 24, 32),
689            Token::new(TokenKind::Semicolon, 32, 33),
690            Token::new(TokenKind::Eof, 42, 42),
691        ];
692
693        for expected in expected_tokens {
694            let token = lexer.next_token();
695            assert_eq!(token, expected);
696        }
697    }
698
699    #[test]
700    fn test_lex_if() {
701        let input = r#"
702            if true {
703                let x = 10;
704            }
705        "#;
706
707        let mut lexer = Lexer::new(input.to_string());
708        let expected_tokens = vec![
709            Token::new(TokenKind::If, 13, 15),
710            Token::new(TokenKind::Bool(true), 16, 20),
711            Token::new(TokenKind::LBrace, 21, 22),
712            Token::new(TokenKind::Let, 39, 42),
713            Token::new(TokenKind::Identifier("x".to_string()), 43, 44),
714            Token::new(TokenKind::Equals, 45, 46),
715            Token::new(TokenKind::Int(10), 47, 49),
716            Token::new(TokenKind::Semicolon, 49, 50),
717            Token::new(TokenKind::RBrace, 63, 64),
718            Token::new(TokenKind::Eof, 73, 73),
719        ];
720
721        for expected in expected_tokens {
722            let token = lexer.next_token();
723            assert_eq!(token, expected);
724        }
725    }
726
727    #[test]
728    fn test_lex_if_else() {
729        let input = r#"
730            if true {
731                let x = 10;
732            } else {
733                let x = 20;
734            }
735        "#;
736
737        let mut lexer = Lexer::new(input.to_string());
738        let expected_tokens = vec![
739            Token::new(TokenKind::If, 13, 15),
740            Token::new(TokenKind::Bool(true), 16, 20),
741            Token::new(TokenKind::LBrace, 21, 22),
742            Token::new(TokenKind::Let, 39, 42),
743            Token::new(TokenKind::Identifier("x".to_string()), 43, 44),
744            Token::new(TokenKind::Equals, 45, 46),
745            Token::new(TokenKind::Int(10), 47, 49),
746            Token::new(TokenKind::Semicolon, 49, 50),
747            Token::new(TokenKind::RBrace, 63, 64),
748            Token::new(TokenKind::Else, 65, 69),
749            Token::new(TokenKind::LBrace, 70, 71),
750            Token::new(TokenKind::Let, 88, 91),
751            Token::new(TokenKind::Identifier("x".to_string()), 92, 93),
752            Token::new(TokenKind::Equals, 94, 95),
753            Token::new(TokenKind::Int(20), 96, 98),
754            Token::new(TokenKind::Semicolon, 98, 99),
755            Token::new(TokenKind::RBrace, 112, 113),
756            Token::new(TokenKind::Eof, 122, 122),
757        ];
758
759        for expected in expected_tokens {
760            let token = lexer.next_token();
761            assert_eq!(token, expected);
762        }
763    }
764
765    #[test]
766    fn test_lex_struct() {
767        let input = r#"
768            struct Person {
769                name: string,
770                age: int,
771            }
772        "#;
773
774        let mut lexer = Lexer::new(input.to_string());
775        let expected_tokens = vec![
776            Token::new(TokenKind::Struct, 13, 19),
777            Token::new(TokenKind::Identifier("Person".to_string()), 20, 26),
778            Token::new(TokenKind::LBrace, 27, 28),
779            Token::new(TokenKind::Identifier("name".to_string()), 45, 49),
780            Token::new(TokenKind::Colon, 49, 50),
781            Token::new(TokenKind::Type("string".to_string()), 51, 57),
782            Token::new(TokenKind::Comma, 57, 58),
783            Token::new(TokenKind::Identifier("age".to_string()), 75, 78),
784            Token::new(TokenKind::Colon, 78, 79),
785            Token::new(TokenKind::Type("int".to_string()), 80, 83),
786            Token::new(TokenKind::Comma, 83, 84),
787            Token::new(TokenKind::RBrace, 97, 98),
788            Token::new(TokenKind::Eof, 107, 107),
789        ];
790
791        for expected in expected_tokens {
792            let token = lexer.next_token();
793            assert_eq!(token, expected);
794        }
795    }
796
797    #[test]
798    fn test_lex_struct_instance() {
799        let input = r#"
800            struct Person {
801                name: string,
802                age: int,
803            }
804
805            let p = Person {
806                name: "Felipe",
807                age: 30,
808            };
809        "#;
810
811        let mut lexer = Lexer::new(input.to_string());
812        let expected_tokens = vec![
813            Token::new(TokenKind::Struct, 13, 19),
814            Token::new(TokenKind::Identifier("Person".to_string()), 20, 26),
815            Token::new(TokenKind::LBrace, 27, 28),
816            Token::new(TokenKind::Identifier("name".to_string()), 45, 49),
817            Token::new(TokenKind::Colon, 49, 50),
818            Token::new(TokenKind::Type("string".to_string()), 51, 57),
819            Token::new(TokenKind::Comma, 57, 58),
820            Token::new(TokenKind::Identifier("age".to_string()), 75, 78),
821            Token::new(TokenKind::Colon, 78, 79),
822            Token::new(TokenKind::Type("int".to_string()), 80, 83),
823            Token::new(TokenKind::Comma, 83, 84),
824            Token::new(TokenKind::RBrace, 97, 98),
825            Token::new(TokenKind::Let, 112, 115),
826            Token::new(TokenKind::Identifier("p".to_string()), 116, 117),
827            Token::new(TokenKind::Equals, 118, 119),
828            Token::new(TokenKind::Identifier("Person".to_string()), 120, 126),
829            Token::new(TokenKind::LBrace, 127, 128),
830            Token::new(TokenKind::Identifier("name".to_string()), 145, 149),
831            Token::new(TokenKind::Colon, 149, 150),
832            Token::new(TokenKind::String("Felipe".to_string()), 151, 159),
833            Token::new(TokenKind::Comma, 159, 160),
834            Token::new(TokenKind::Identifier("age".to_string()), 177, 180),
835            Token::new(TokenKind::Colon, 180, 181),
836            Token::new(TokenKind::Int(30), 182, 184),
837            Token::new(TokenKind::Comma, 184, 185),
838            Token::new(TokenKind::RBrace, 198, 199),
839            Token::new(TokenKind::Semicolon, 199, 200),
840            Token::new(TokenKind::Eof, 209, 209),
841        ];
842
843        for expected in expected_tokens {
844            let token = lexer.next_token();
845            assert_eq!(token, expected);
846        }
847    }
848
849    #[test]
850    fn test_lex_struct_field_access() {
851        let input = "p.name";
852
853        let mut lexer = Lexer::new(input.to_string());
854        let expected_tokens = vec![
855            Token::new(TokenKind::Identifier("p".to_string()), 0, 1),
856            Token::new(TokenKind::Dot, 1, 2),
857            Token::new(TokenKind::Identifier("name".to_string()), 2, 6),
858            Token::new(TokenKind::Eof, 6, 6),
859        ];
860
861        for expected in expected_tokens {
862            let token = lexer.next_token();
863            assert_eq!(token, expected);
864        }
865    }
866
867    #[test]
868    fn test_lex_struct_field_set() {
869        let input = "client.age = 12;";
870
871        let mut lexer = Lexer::new(input.to_string());
872        let expected_tokens = vec![
873            Token::new(TokenKind::Identifier("client".to_string()), 0, 6),
874            Token::new(TokenKind::Dot, 6, 7),
875            Token::new(TokenKind::Identifier("age".to_string()), 7, 10),
876            Token::new(TokenKind::Equals, 11, 12),
877            Token::new(TokenKind::Int(12), 13, 15),
878            Token::new(TokenKind::Semicolon, 15, 16),
879            Token::new(TokenKind::Eof, 16, 16),
880        ];
881
882        for expected in expected_tokens {
883            let token = lexer.next_token();
884            assert_eq!(token, expected);
885        }
886    }
887
888    #[test]
889    fn test_lex_eq() {
890        let input = "x == 10;";
891
892        let mut lexer = Lexer::new(input.to_string());
893        let expected_tokens = vec![
894            Token::new(TokenKind::Identifier("x".to_string()), 0, 1),
895            Token::new(TokenKind::DblEquals, 2, 4),
896            Token::new(TokenKind::Int(10), 5, 7),
897            Token::new(TokenKind::Semicolon, 7, 8),
898            Token::new(TokenKind::Eof, 8, 8),
899        ];
900
901        for expected in expected_tokens {
902            let token = lexer.next_token();
903            assert_eq!(token, expected);
904        }
905    }
906
907    #[test]
908    fn test_lex_enum() {
909        let input = r#"
910            enum Option {
911                Some(String),
912                None,
913            }
914        "#;
915
916        let mut lexer = Lexer::new(input.to_string());
917        let expected_tokens = vec![
918            TokenKind::Enum,
919            TokenKind::Identifier("Option".to_string()),
920            TokenKind::LBrace,
921            TokenKind::Identifier("Some".to_string()),
922            TokenKind::LParen,
923            TokenKind::Identifier("String".to_string()),
924            TokenKind::RParen,
925            TokenKind::Comma,
926            TokenKind::Identifier("None".to_string()),
927            TokenKind::Comma,
928            TokenKind::RBrace,
929            TokenKind::Eof,
930        ];
931
932        for expected in expected_tokens {
933            let kind = lexer.next_token().kind;
934            assert_eq!(kind, expected);
935        }
936    }
937
938    #[test]
939    fn test_lex_enum_variant() {
940        let input = r"let c = Color::Red;";
941
942        let mut lexer = Lexer::new(input.to_string());
943        let expected_tokens = vec![
944            TokenKind::Let,
945            TokenKind::Identifier("c".to_string()),
946            TokenKind::Equals,
947            TokenKind::Identifier("Color".to_string()),
948            TokenKind::DblColon,
949            TokenKind::Identifier("Red".to_string()),
950            TokenKind::Semicolon,
951            TokenKind::Eof,
952        ];
953
954        for expected in expected_tokens {
955            let kind = lexer.next_token().kind;
956            assert_eq!(kind, expected);
957        }
958    }
959
960    #[test]
961    fn test_lex_match_enum() {
962        let code = r#"
963            match n {
964                Name::Existing(name) => name,
965                Name::NotExisting => "Not existing",
966            }
967        "#;
968
969        let mut lexer = Lexer::new(code);
970        let expected_tokens = vec![
971            TokenKind::Match,
972            TokenKind::Identifier("n".to_string()),
973            TokenKind::LBrace,
974            TokenKind::Identifier("Name".to_string()),
975            TokenKind::DblColon,
976            TokenKind::Identifier("Existing".to_string()),
977            TokenKind::LParen,
978            TokenKind::Identifier("name".to_string()),
979            TokenKind::RParen,
980            TokenKind::FatArrow,
981            TokenKind::Identifier("name".to_string()),
982            TokenKind::Comma,
983            TokenKind::Identifier("Name".to_string()),
984            TokenKind::DblColon,
985            TokenKind::Identifier("NotExisting".to_string()),
986            TokenKind::FatArrow,
987            TokenKind::String("Not existing".to_string()),
988            TokenKind::Comma,
989            TokenKind::RBrace,
990            TokenKind::Eof,
991        ];
992
993        for expected in expected_tokens {
994            let kind = lexer.next_token().kind;
995            assert_eq!(kind, expected);
996        }
997    }
998
999    #[test]
1000    fn test_lex_line_comments() {
1001        let code = r#"
1002            // This is a comment
1003            let x = 10; // Another comment
1004        "#;
1005
1006        let mut lexer = Lexer::new(code);
1007        let expected_tokens = vec![
1008            TokenKind::Let,
1009            TokenKind::Identifier("x".to_string()),
1010            TokenKind::Equals,
1011            TokenKind::Int(10),
1012            TokenKind::Semicolon,
1013            TokenKind::Eof,
1014        ];
1015
1016        for expected in expected_tokens {
1017            let kind = lexer.next_token().kind;
1018            assert_eq!(kind, expected);
1019        }
1020    }
1021
1022    #[test]
1023    fn test_lex_block_comments() {
1024        let code = r#"
1025            /* This is a multi
1026             * line comment */
1027            let x = 10; /* Another comment */
1028        "#;
1029
1030        let mut lexer = Lexer::new(code);
1031        let expected_tokens = vec![
1032            TokenKind::Let,
1033            TokenKind::Identifier("x".to_string()),
1034            TokenKind::Equals,
1035            TokenKind::Int(10),
1036            TokenKind::Semicolon,
1037            TokenKind::Eof,
1038        ];
1039
1040        for expected in expected_tokens {
1041            let kind = lexer.next_token().kind;
1042            assert_eq!(kind, expected);
1043        }
1044    }
1045
1046    #[test]
1047    fn test_lex_array() {
1048        let code = r#"
1049            let arr = [1,2,3,4,5];
1050            print(arr);
1051        "#;
1052
1053        let mut lexer = Lexer::new(code);
1054        let expected_tokens = vec![
1055            TokenKind::Let,
1056            TokenKind::Identifier("arr".to_string()),
1057            TokenKind::Equals,
1058            TokenKind::LSquare,
1059            TokenKind::Int(1),
1060            TokenKind::Comma,
1061            TokenKind::Int(2),
1062            TokenKind::Comma,
1063            TokenKind::Int(3),
1064            TokenKind::Comma,
1065            TokenKind::Int(4),
1066            TokenKind::Comma,
1067            TokenKind::Int(5),
1068            TokenKind::RSquare,
1069            TokenKind::Semicolon,
1070            TokenKind::Identifier("print".to_string()),
1071            TokenKind::LParen,
1072            TokenKind::Identifier("arr".to_string()),
1073            TokenKind::RParen,
1074            TokenKind::Semicolon,
1075            TokenKind::Eof,
1076        ];
1077
1078        for expected in expected_tokens {
1079            let kind = lexer.next_token().kind;
1080            assert_eq!(kind, expected);
1081        }
1082    }
1083
1084    #[test]
1085    fn test_lex_range() {
1086        let code = "let range = 1..10;";
1087        let mut lexer = Lexer::new(code);
1088        let expected_tokens = vec![
1089            TokenKind::Let,
1090            TokenKind::Identifier("range".to_string()),
1091            TokenKind::Equals,
1092            TokenKind::Int(1),
1093            TokenKind::DblDot,
1094            TokenKind::Int(10),
1095            TokenKind::Semicolon,
1096            TokenKind::Eof,
1097        ];
1098
1099        for expected in expected_tokens {
1100            let kind = lexer.next_token().kind;
1101            assert_eq!(kind, expected);
1102        }
1103    }
1104
1105    #[test]
1106    fn test_lex_inclusive_range() {
1107        let code = "let range = 1..=10;";
1108        let mut lexer = Lexer::new(code);
1109        let expected_tokens = vec![
1110            TokenKind::Let,
1111            TokenKind::Identifier("range".to_string()),
1112            TokenKind::Equals,
1113            TokenKind::Int(1),
1114            TokenKind::DblDotEquals,
1115            TokenKind::Int(10),
1116            TokenKind::Semicolon,
1117            TokenKind::Eof,
1118        ];
1119
1120        for expected in expected_tokens {
1121            let kind = lexer.next_token().kind;
1122            assert_eq!(kind, expected);
1123        }
1124    }
1125
1126    #[test]
1127    fn test_lex_from_start_range() {
1128        let code = "let range = ..10;";
1129        let mut lexer = Lexer::new(code);
1130        let expected_tokens = vec![
1131            TokenKind::Let,
1132            TokenKind::Identifier("range".to_string()),
1133            TokenKind::Equals,
1134            TokenKind::DblDot,
1135            TokenKind::Int(10),
1136            TokenKind::Semicolon,
1137            TokenKind::Eof,
1138        ];
1139
1140        for expected in expected_tokens {
1141            let kind = lexer.next_token().kind;
1142            assert_eq!(kind, expected);
1143        }
1144    }
1145
1146    #[test]
1147    fn test_lex_from_start_inclusive_range() {
1148        let code = "let range = ..=10;";
1149        let mut lexer = Lexer::new(code);
1150        let expected_tokens = vec![
1151            TokenKind::Let,
1152            TokenKind::Identifier("range".to_string()),
1153            TokenKind::Equals,
1154            TokenKind::DblDotEquals,
1155            TokenKind::Int(10),
1156            TokenKind::Semicolon,
1157            TokenKind::Eof,
1158        ];
1159
1160        for expected in expected_tokens {
1161            let kind = lexer.next_token().kind;
1162            assert_eq!(kind, expected);
1163        }
1164    }
1165
1166    #[test]
1167    fn test_lex_until_end_range() {
1168        let code = "let range = 1..;";
1169        let mut lexer = Lexer::new(code);
1170        let expected_tokens = vec![
1171            TokenKind::Let,
1172            TokenKind::Identifier("range".to_string()),
1173            TokenKind::Equals,
1174            TokenKind::Int(1),
1175            TokenKind::DblDot,
1176            TokenKind::Semicolon,
1177            TokenKind::Eof,
1178        ];
1179
1180        for expected in expected_tokens {
1181            let kind = lexer.next_token().kind;
1182            assert_eq!(kind, expected);
1183        }
1184    }
1185
1186    #[test]
1187    fn test_lex_until_end_inclusive_range() {
1188        let code = "let range = 1..=;";
1189        let mut lexer = Lexer::new(code);
1190        let expected_tokens = vec![
1191            TokenKind::Let,
1192            TokenKind::Identifier("range".to_string()),
1193            TokenKind::Equals,
1194            TokenKind::Int(1),
1195            TokenKind::DblDotEquals,
1196            TokenKind::Semicolon,
1197            TokenKind::Eof,
1198        ];
1199
1200        for expected in expected_tokens {
1201            let kind = lexer.next_token().kind;
1202            assert_eq!(kind, expected);
1203        }
1204    }
1205
1206    #[test]
1207    fn test_lex_plus_equals() {
1208        let code = "x += 10;";
1209        let mut lexer = Lexer::new(code);
1210        let expected_tokens = vec![
1211            TokenKind::Identifier("x".to_string()),
1212            TokenKind::PlusEquals,
1213            TokenKind::Int(10),
1214            TokenKind::Semicolon,
1215            TokenKind::Eof,
1216        ];
1217
1218        for expected in expected_tokens {
1219            let kind = lexer.next_token().kind;
1220            assert_eq!(kind, expected);
1221        }
1222    }
1223
1224    #[test]
1225    fn test_lex_less_equals() {
1226        let code = "x -= 10;";
1227        let mut lexer = Lexer::new(code);
1228        let expected_tokens = vec![
1229            TokenKind::Identifier("x".to_string()),
1230            TokenKind::MinusEquals,
1231            TokenKind::Int(10),
1232            TokenKind::Semicolon,
1233            TokenKind::Eof,
1234        ];
1235
1236        for expected in expected_tokens {
1237            let kind = lexer.next_token().kind;
1238            assert_eq!(kind, expected);
1239        }
1240    }
1241
1242    #[test]
1243    fn test_lex_times_equals() {
1244        let code = "x *= 10;";
1245        let mut lexer = Lexer::new(code);
1246        let expected_tokens = vec![
1247            TokenKind::Identifier("x".to_string()),
1248            TokenKind::StarEquals,
1249            TokenKind::Int(10),
1250            TokenKind::Semicolon,
1251            TokenKind::Eof,
1252        ];
1253
1254        for expected in expected_tokens {
1255            let kind = lexer.next_token().kind;
1256            assert_eq!(kind, expected);
1257        }
1258    }
1259
1260    #[test]
1261    fn test_lex_divide_equals() {
1262        let code = "x /= 10;";
1263        let mut lexer = Lexer::new(code);
1264        let expected_tokens = vec![
1265            TokenKind::Identifier("x".to_string()),
1266            TokenKind::SlashEquals,
1267            TokenKind::Int(10),
1268            TokenKind::Semicolon,
1269            TokenKind::Eof,
1270        ];
1271
1272        for expected in expected_tokens {
1273            let kind = lexer.next_token().kind;
1274            assert_eq!(kind, expected);
1275        }
1276    }
1277
1278    #[test]
1279    fn test_lex_mod_equals() {
1280        let code = "x %= 10;";
1281        let mut lexer = Lexer::new(code);
1282        let expected_tokens = vec![
1283            TokenKind::Identifier("x".to_string()),
1284            TokenKind::PercentEquals,
1285            TokenKind::Int(10),
1286            TokenKind::Semicolon,
1287            TokenKind::Eof,
1288        ];
1289
1290        for expected in expected_tokens {
1291            let kind = lexer.next_token().kind;
1292            assert_eq!(kind, expected);
1293        }
1294    }
1295
1296    #[test]
1297    fn test_comparison_operators() {
1298        let code = "x > 10; x < 10; x >= 10; x <= 10;";
1299        let mut lexer = Lexer::new(code);
1300        let expected_tokens = vec![
1301            TokenKind::Identifier("x".to_string()),
1302            TokenKind::GreaterThan,
1303            TokenKind::Int(10),
1304            TokenKind::Semicolon,
1305            TokenKind::Identifier("x".to_string()),
1306            TokenKind::LessThan,
1307            TokenKind::Int(10),
1308            TokenKind::Semicolon,
1309            TokenKind::Identifier("x".to_string()),
1310            TokenKind::GreaterThanEquals,
1311            TokenKind::Int(10),
1312            TokenKind::Semicolon,
1313            TokenKind::Identifier("x".to_string()),
1314            TokenKind::LessThanEquals,
1315            TokenKind::Int(10),
1316            TokenKind::Semicolon,
1317            TokenKind::Eof,
1318        ];
1319
1320        for expected in expected_tokens {
1321            let kind = lexer.next_token().kind;
1322            assert_eq!(kind, expected);
1323        }
1324    }
1325
1326    #[test]
1327    fn test_lex_while() {
1328        let code = r#"
1329            while x < 10 {
1330                x += 1;
1331            }
1332        "#;
1333
1334        let mut lexer = Lexer::new(code);
1335        let expected_tokens = vec![
1336            TokenKind::While,
1337            TokenKind::Identifier("x".to_string()),
1338            TokenKind::LessThan,
1339            TokenKind::Int(10),
1340            TokenKind::LBrace,
1341            TokenKind::Identifier("x".to_string()),
1342            TokenKind::PlusEquals,
1343            TokenKind::Int(1),
1344            TokenKind::Semicolon,
1345            TokenKind::RBrace,
1346            TokenKind::Eof,
1347        ];
1348
1349        for expected in expected_tokens {
1350            let kind = lexer.next_token().kind;
1351            assert_eq!(kind, expected);
1352        }
1353    }
1354
1355    #[test]
1356    fn test_lex_loop() {
1357        let code = r#"
1358            loop {
1359                x += 1;
1360            }
1361        "#;
1362
1363        let mut lexer = Lexer::new(code);
1364        let expected_tokens = vec![
1365            TokenKind::Loop,
1366            TokenKind::LBrace,
1367            TokenKind::Identifier("x".to_string()),
1368            TokenKind::PlusEquals,
1369            TokenKind::Int(1),
1370            TokenKind::Semicolon,
1371            TokenKind::RBrace,
1372            TokenKind::Eof,
1373        ];
1374
1375        for expected in expected_tokens {
1376            let kind = lexer.next_token().kind;
1377            assert_eq!(kind, expected);
1378        }
1379    }
1380
1381    #[test]
1382    fn test_lex_impl() {
1383        let input = r#"
1384            impl Point {
1385                fn new(x: int, y: int) -> Point {
1386                    Point { x: x, y: y }
1387                }
1388            }
1389        "#;
1390
1391        let mut lexer = Lexer::new(input.to_string());
1392        let expected_tokens = vec![
1393            TokenKind::Impl,
1394            TokenKind::Identifier("Point".to_string()),
1395            TokenKind::LBrace,
1396            TokenKind::Function,
1397            TokenKind::Identifier("new".to_string()),
1398            TokenKind::LParen,
1399            TokenKind::Identifier("x".to_string()),
1400            TokenKind::Colon,
1401            TokenKind::Type("int".to_string()),
1402            TokenKind::Comma,
1403            TokenKind::Identifier("y".to_string()),
1404            TokenKind::Colon,
1405            TokenKind::Type("int".to_string()),
1406            TokenKind::RParen,
1407            TokenKind::Arrow,
1408            TokenKind::Identifier("Point".to_string()),
1409            TokenKind::LBrace,
1410            TokenKind::Identifier("Point".to_string()),
1411            TokenKind::LBrace,
1412            TokenKind::Identifier("x".to_string()),
1413            TokenKind::Colon,
1414            TokenKind::Identifier("x".to_string()),
1415            TokenKind::Comma,
1416            TokenKind::Identifier("y".to_string()),
1417            TokenKind::Colon,
1418            TokenKind::Identifier("y".to_string()),
1419            TokenKind::RBrace,
1420            TokenKind::RBrace,
1421            TokenKind::RBrace,
1422            TokenKind::Eof,
1423        ];
1424
1425        for expected in expected_tokens {
1426            let token = lexer.next_token();
1427            assert_eq!(token.kind, expected);
1428        }
1429    }
1430
1431    #[test]
1432    fn lex_method_call() {
1433        let code = "Point::new(3, 4)";
1434        let mut lexer = Lexer::new(code.to_string());
1435
1436        let expected_tokens = vec![
1437            TokenKind::Identifier("Point".to_string()),
1438            TokenKind::DblColon,
1439            TokenKind::Identifier("new".to_string()),
1440            TokenKind::LParen,
1441            TokenKind::Int(3),
1442            TokenKind::Comma,
1443            TokenKind::Int(4),
1444            TokenKind::RParen,
1445        ];
1446
1447        for expected in expected_tokens {
1448            let token = lexer.next_token();
1449            assert_eq!(token.kind, expected);
1450        }
1451    }
1452
1453    #[test]
1454    fn test_use_and_pub_keywords() {
1455        let input = r#"
1456            use local::models::User;
1457            pub fn hello() {
1458                println!("Hello");
1459            }
1460        "#;
1461        let mut lexer = Lexer::new(input.to_string());
1462
1463        let expected_tokens = vec![
1464            TokenKind::Use,
1465            TokenKind::Identifier("local".to_string()),
1466            TokenKind::DblColon,
1467            TokenKind::Identifier("models".to_string()),
1468            TokenKind::DblColon,
1469            TokenKind::Identifier("User".to_string()),
1470            TokenKind::Semicolon,
1471            TokenKind::Pub,
1472            TokenKind::Function,
1473            TokenKind::Identifier("hello".to_string()),
1474            TokenKind::LParen,
1475            TokenKind::RParen,
1476            TokenKind::LBrace,
1477            TokenKind::Identifier("println".to_string()),
1478            TokenKind::Bang,
1479            TokenKind::LParen,
1480            TokenKind::String("Hello".to_string()),
1481            TokenKind::RParen,
1482            TokenKind::Semicolon,
1483            TokenKind::RBrace,
1484        ];
1485
1486        for expected in expected_tokens {
1487            let token = lexer.next_token();
1488            assert_eq!(token.kind, expected);
1489        }
1490    }
1491}