1use std::fmt;
2
3use crate::span::Span;
4
5#[derive(Debug, PartialEq, Clone)]
6pub struct Token {
7 pub kind: TokenKind,
8 pub span: Span,
9}
10
11impl Token {
12 #[allow(unused)]
13 pub fn new(kind: TokenKind, start: usize, end: usize) -> Token {
14 Token {
15 kind,
16 span: Span::new(start, end),
17 }
18 }
19}
20
21pub const EOF: Token = Token {
22 kind: TokenKind::Eof,
23 span: Span { start: 0, end: 0 },
24};
25
26#[derive(Debug, PartialEq, Clone)]
27pub enum TokenKind {
28 Function,
29 Struct,
30 Impl,
31 Enum,
32 Let,
33 Use,
34 Pub,
35 Mod,
36 Extern,
37 Async,
38 As,
39 If,
40 Else,
41 Match,
42 Loop,
43 While,
44 For,
45 In,
46 Break,
47 Continue,
48 Return,
49 Identifier(String),
50 Int(i64),
51 Float(f64),
52 String(String),
53 Bool(bool),
54 Equals,
55 DblEquals,
56 Semicolon,
57 LParen,
58 RParen,
59 LBrace,
60 RBrace,
61 LSquare,
62 RSquare,
63 Comma,
64 Arrow,
65 FatArrow,
66 Type(String),
67 Plus,
68 Minus,
69 Asterisk,
70 Slash,
71 Percent,
72 PlusEquals,
73 MinusEquals,
74 StarEquals,
75 SlashEquals,
76 PercentEquals,
77 LessThan,
78 LessThanEquals,
79 GreaterThan,
80 GreaterThanEquals,
81 BangEquals,
82 DblAmpersand,
83 DblPipe,
84 Pipe,
85 Colon,
86 DblColon,
87 Dot,
88 DblDot,
89 DblDotEquals,
90 Underscore,
91 Bang,
92 Question,
93 Hash,
94 Error(String),
95 Eof,
96}
97
98impl TokenKind {
99 pub fn is_identifier(&self) -> bool {
100 matches!(self, TokenKind::Identifier(_))
101 }
102}
103
104impl fmt::Display for TokenKind {
105 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
106 let s = match self {
107 TokenKind::Function => "Function",
108 TokenKind::Struct => "Struct",
109 TokenKind::Impl => "Impl",
110 TokenKind::Enum => "Enum",
111 TokenKind::Let => "Let",
112 TokenKind::Use => "Use",
113 TokenKind::Pub => "Pub",
114 TokenKind::Mod => "Mod",
115 TokenKind::Extern => "Extern",
116 TokenKind::Async => "Async",
117 TokenKind::As => "As",
118 TokenKind::If => "If",
119 TokenKind::Else => "Else",
120 TokenKind::Match => "Match",
121 TokenKind::Loop => "Loop",
122 TokenKind::While => "While",
123 TokenKind::For => "For",
124 TokenKind::In => "In",
125 TokenKind::Break => "Break",
126 TokenKind::Continue => "Continue",
127 TokenKind::Return => "Return",
128 TokenKind::Identifier(s) => s,
129 TokenKind::Int(i) => return write!(f, "{}", i),
130 TokenKind::Float(fl) => return write!(f, "{}", fl),
131 TokenKind::String(s) => s,
132 TokenKind::Bool(b) => return write!(f, "{}", b),
133 TokenKind::Equals => "=",
134 TokenKind::DblEquals => "==",
135 TokenKind::Semicolon => ";",
136 TokenKind::LParen => "(",
137 TokenKind::RParen => ")",
138 TokenKind::LBrace => "{",
139 TokenKind::RBrace => "}",
140 TokenKind::LSquare => "[",
141 TokenKind::RSquare => "]",
142 TokenKind::Comma => ",",
143 TokenKind::Arrow => "->",
144 TokenKind::FatArrow => "=>",
145 TokenKind::Type(s) => s,
146 TokenKind::Plus => "+",
147 TokenKind::Minus => "-",
148 TokenKind::Asterisk => "*",
149 TokenKind::Slash => "/",
150 TokenKind::Percent => "%",
151 TokenKind::PlusEquals => "+=",
152 TokenKind::MinusEquals => "-=",
153 TokenKind::StarEquals => "*=",
154 TokenKind::SlashEquals => "/=",
155 TokenKind::PercentEquals => "%=",
156 TokenKind::LessThan => "<",
157 TokenKind::LessThanEquals => "<=",
158 TokenKind::GreaterThan => ">",
159 TokenKind::GreaterThanEquals => ">=",
160 TokenKind::BangEquals => "!=",
161 TokenKind::DblAmpersand => "&&",
162 TokenKind::DblPipe => "||",
163 TokenKind::Pipe => "|",
164 TokenKind::Colon => ":",
165 TokenKind::DblColon => "::",
166 TokenKind::Dot => ".",
167 TokenKind::DblDot => "..",
168 TokenKind::DblDotEquals => "..=",
169 TokenKind::Underscore => "_",
170 TokenKind::Bang => "!",
171 TokenKind::Question => "?",
172 TokenKind::Hash => "#",
173 TokenKind::Error(s) => s,
174 TokenKind::Eof => "EOF",
175 };
176 write!(f, "{}", s)
177 }
178}
179
180#[derive(Debug)]
181pub struct Lexer {
182 input: String,
183 position: usize, read_position: usize, ch: Option<char>, start_position: usize, }
188
189impl Lexer {
190 pub fn new(input: impl Into<String>) -> Lexer {
191 let input = input.into();
192 let mut l = Lexer {
193 input,
194 position: 0,
195 read_position: 0,
196 ch: None,
197 start_position: 0,
198 };
199 l.read_char();
200 l
201 }
202
203 pub fn lex_all(&mut self) -> Vec<Token> {
204 let mut tokens = Vec::new();
205 loop {
206 let token = self.next_token();
207 if token.kind == TokenKind::Eof {
208 break;
209 }
210 tokens.push(token);
211 }
212 tokens
213 }
214
215 fn read_char(&mut self) {
216 self.position = self.read_position;
217
218 if self.read_position >= self.input.len() {
219 self.ch = None;
220 return;
221 }
222
223 let mut chars = self.input[self.read_position..].chars();
225 if let Some(ch) = chars.next() {
226 self.ch = Some(ch);
227 self.read_position += ch.len_utf8();
229 } else {
230 self.ch = None;
231 }
232 }
233
234 fn peek_char(&self) -> Option<char> {
235 if self.read_position >= self.input.len() {
236 None
237 } else {
238 self.input[self.read_position..].chars().next()
239 }
240 }
241
242 fn skip_whitespace(&mut self) {
243 while let Some(c) = self.ch {
244 if c.is_whitespace() {
245 self.read_char();
246 } else {
247 break;
248 }
249 }
250 }
251
252 pub fn next_token(&mut self) -> Token {
253 self.skip_whitespace();
255 self.start_position = self.position;
256 let token = match self.ch {
257 Some('=') => {
258 if self.peek_char() == Some('=') {
259 self.read_char();
260 self.create_token(TokenKind::DblEquals)
261 } else if self.peek_char() == Some('>') {
262 self.read_char();
263 self.create_token(TokenKind::FatArrow)
264 } else {
265 self.create_token(TokenKind::Equals)
266 }
267 }
268 Some(';') => self.create_token(TokenKind::Semicolon),
269 Some('(') => self.create_token(TokenKind::LParen),
270 Some(')') => self.create_token(TokenKind::RParen),
271 Some('{') => self.create_token(TokenKind::LBrace),
272 Some('}') => self.create_token(TokenKind::RBrace),
273 Some('[') => self.create_token(TokenKind::LSquare),
274 Some(']') => self.create_token(TokenKind::RSquare),
275 Some('>') => {
276 if self.peek_char() == Some('=') {
277 self.read_char();
278 self.create_token(TokenKind::GreaterThanEquals)
279 } else {
280 self.create_token(TokenKind::GreaterThan)
281 }
282 }
283 Some('<') => {
284 if self.peek_char() == Some('=') {
285 self.read_char();
286 self.create_token(TokenKind::LessThanEquals)
287 } else {
288 self.create_token(TokenKind::LessThan)
289 }
290 }
291 Some(',') => self.create_token(TokenKind::Comma),
292 Some('+') => {
293 if self.peek_char() == Some('=') {
294 self.read_char();
295 self.create_token(TokenKind::PlusEquals)
296 } else {
297 self.create_token(TokenKind::Plus)
298 }
299 }
300 Some('-') => {
301 if self.peek_char() == Some('=') {
302 self.read_char();
303 self.create_token(TokenKind::MinusEquals)
304 } else if self.peek_char() == Some('>') {
305 self.read_char();
306 self.create_token(TokenKind::Arrow)
307 } else {
308 self.create_token(TokenKind::Minus)
309 }
310 }
311 Some('*') => {
312 if self.peek_char() == Some('=') {
313 self.read_char();
314 self.create_token(TokenKind::StarEquals)
315 } else {
316 self.create_token(TokenKind::Asterisk)
317 }
318 }
319 Some(':') => {
320 if self.peek_char() == Some(':') {
321 self.read_char();
322 self.create_token(TokenKind::DblColon)
323 } else {
324 self.create_token(TokenKind::Colon)
325 }
326 }
327 Some('/') => {
328 if self.peek_char() == Some('/') {
329 while let Some(c) = self.ch {
330 if c == '\n' {
331 break;
332 }
333 self.read_char();
334 }
335 return self.next_token();
336 } else if self.peek_char() == Some('*') {
337 while let Some(c) = self.ch {
338 if c == '*' && self.peek_char() == Some('/') {
339 self.read_char();
340 self.read_char();
341 break;
342 }
343 self.read_char();
344 }
345 return self.next_token();
346 } else if self.peek_char() == Some('=') {
347 self.read_char();
348 self.create_token(TokenKind::SlashEquals)
349 } else {
350 self.create_token(TokenKind::Slash)
351 }
352 }
353 Some('%') => {
354 if self.peek_char() == Some('=') {
355 self.read_char();
356 self.create_token(TokenKind::PercentEquals)
357 } else {
358 self.create_token(TokenKind::Percent)
359 }
360 }
361 Some('.') => {
362 let start = self.position;
363 if self.peek_char() == Some('.') {
364 self.read_char();
365
366 if self.peek_char() == Some('=') {
367 self.read_char();
368 self.read_char();
369 return Token::new(TokenKind::DblDotEquals, start, self.position);
370 }
371
372 self.read_char();
373 return Token::new(TokenKind::DblDot, start, self.position);
374 } else {
375 self.create_token(TokenKind::Dot)
376 }
377 }
378 Some('"') => self.read_string(),
379 Some('_') => {
380 if self.is_identifier_start(self.peek_char()) {
381 self.read_identifier_or_type()
382 } else {
383 self.create_token(TokenKind::Underscore)
384 }
385 }
386 Some('!') => {
387 if self.peek_char() == Some('=') {
388 self.read_char();
389 self.create_token(TokenKind::BangEquals)
390 } else {
391 self.create_token(TokenKind::Bang)
392 }
393 }
394 Some('&') => {
395 if self.peek_char() == Some('&') {
396 self.read_char();
397 self.create_token(TokenKind::DblAmpersand)
398 } else {
399 self.create_token(TokenKind::Error("Unexpected character: &".to_string()))
400 }
401 }
402 Some('|') => {
403 if self.peek_char() == Some('|') {
404 self.read_char();
405 self.create_token(TokenKind::DblPipe)
406 } else {
407 self.create_token(TokenKind::Pipe)
408 }
409 }
410 Some('?') => self.create_token(TokenKind::Question),
411 Some('#') => self.create_token(TokenKind::Hash),
412 Some(c) => {
413 if c.is_alphabetic() {
414 let token = self.read_identifier_or_type();
415 return token;
416 } else if c.is_ascii_digit() {
417 return self.read_number();
418 } else {
419 self.create_token(TokenKind::Error(format!("Unexpected character: {}", c)))
420 }
421 }
422 None => self.create_token_no_advance(TokenKind::Eof),
423 };
424 self.read_char();
425 token
426 }
427
428 fn is_identifier_start(&self, c: Option<char>) -> bool {
429 c.is_some_and(|c| c.is_alphabetic() || c == '_')
430 }
431
432 fn is_identifier_char(&self, c: char) -> bool {
433 c.is_alphanumeric() || c == '_'
434 }
435
436 fn create_token(&self, kind: TokenKind) -> Token {
437 Token {
438 kind,
439 span: Span::new(self.start_position, self.read_position),
440 }
441 }
442
443 fn create_token_no_advance(&self, kind: TokenKind) -> Token {
444 Token {
445 kind,
446 span: Span::new(self.start_position, self.position),
447 }
448 }
449
450 fn read_identifier_or_type(&mut self) -> Token {
451 let start_position = self.position;
452 while let Some(c) = self.ch {
453 if self.is_identifier_char(c) {
454 self.read_char();
455 } else {
456 break;
457 }
458 }
459 let mut identifier: String = self.input[start_position..self.position].to_string();
460
461 if identifier == "format" && self.ch == Some('!') {
463 self.read_char(); identifier.push('!');
465 }
466
467 let kind = match identifier.as_str() {
468 "struct" => TokenKind::Struct,
469 "impl" => TokenKind::Impl,
470 "enum" => TokenKind::Enum,
471 "false" => TokenKind::Bool(false),
472 "true" => TokenKind::Bool(true),
473 "fn" => TokenKind::Function,
474 "let" => TokenKind::Let,
475 "use" => TokenKind::Use,
476 "pub" => TokenKind::Pub,
477 "mod" => TokenKind::Mod,
478 "extern" => TokenKind::Extern,
479 "async" => TokenKind::Async,
480 "as" => TokenKind::As,
481 "if" => TokenKind::If,
482 "else" => TokenKind::Else,
483 "match" => TokenKind::Match,
484 "loop" => TokenKind::Loop,
485 "while" => TokenKind::While,
486 "for" => TokenKind::For,
487 "in" => TokenKind::In,
488 "break" => TokenKind::Break,
489 "continue" => TokenKind::Continue,
490 "return" => TokenKind::Return,
491 "int" | "float" | "bool" | "string" => TokenKind::Type(identifier),
492 "_" => TokenKind::Underscore,
493 _ => TokenKind::Identifier(identifier),
494 };
495
496 self.create_token_no_advance(kind)
497 }
498
499 fn read_string(&mut self) -> Token {
500 self.read_char(); let mut string = String::new();
502
503 while let Some(c) = self.ch {
504 if c == '"' {
505 break;
506 } else if c == '\\' {
507 self.read_char(); match self.ch {
510 Some('n') => string.push('\n'),
511 Some('t') => string.push('\t'),
512 Some('r') => string.push('\r'),
513 Some('\\') => string.push('\\'),
514 Some('"') => string.push('"'),
515 Some('\'') => string.push('\''),
516 Some('0') => string.push('\0'),
517 Some(ch) => {
518 string.push('\\');
520 string.push(ch);
521 }
522 None => {
523 string.push('\\');
525 break;
526 }
527 }
528 } else {
529 string.push(c);
530 }
531 self.read_char();
532 }
533
534 self.create_token(TokenKind::String(string))
535 }
536
537 fn read_number(&mut self) -> Token {
538 let position = self.position;
539 while let Some(c) = self.ch {
540 if c == '.' && self.peek_char() == Some('.') {
541 break;
542 }
543 if !c.is_ascii_digit() && (c != '.') {
544 break;
545 }
546 self.read_char();
547 }
548
549 let str = self.input[position..self.position].to_string();
550
551 if str.contains('.') {
552 let num = str.parse().unwrap();
553 return self.create_token_no_advance(TokenKind::Float(num));
554 }
555
556 let num = str.parse().unwrap();
557 self.create_token_no_advance(TokenKind::Int(num))
558 }
559}
560
561#[cfg(test)]
562mod tests {
563 use super::*;
564
565 #[test]
566 fn test_float() {
567 let input = "let x = 10.5;";
568 let mut lexer = Lexer::new(input.to_string());
569 let expected_tokens = vec![
570 TokenKind::Let,
571 TokenKind::Identifier("x".to_string()),
572 TokenKind::Equals,
573 TokenKind::Float(10.5),
574 TokenKind::Semicolon,
575 ];
576
577 for expected in expected_tokens {
578 let token = lexer.next_token();
579 assert_eq!(token.kind, expected);
580 }
581 }
582
583 #[test]
584 fn test_next_token() {
585 let input = r#"
586 let five = 5;
587 fn add(x, y) {
588 x + y;
589 }
590 "#;
591 let mut lexer = Lexer::new(input.to_string());
592
593 let expected_tokens = vec![
594 TokenKind::Let,
595 TokenKind::Identifier("five".to_string()),
596 TokenKind::Equals,
597 TokenKind::Int(5),
598 TokenKind::Semicolon,
599 TokenKind::Function,
600 TokenKind::Identifier("add".to_string()),
601 TokenKind::LParen,
602 TokenKind::Identifier("x".to_string()),
603 TokenKind::Comma,
604 TokenKind::Identifier("y".to_string()),
605 TokenKind::RParen,
606 TokenKind::LBrace,
607 TokenKind::Identifier("x".to_string()),
608 TokenKind::Plus,
609 TokenKind::Identifier("y".to_string()),
610 TokenKind::Semicolon,
611 TokenKind::RBrace,
612 TokenKind::Eof,
613 ];
614
615 for expected in expected_tokens {
616 let token = lexer.next_token();
617 assert_eq!(token.kind, expected);
618 }
619 }
620
621 #[test]
622 fn test_lex_let_int() {
623 let input = r#"let age = 1;"#;
624 let mut lexer = Lexer::new(input.to_string());
625 let expected_tokens = vec![
626 Token::new(TokenKind::Let, 0, 3),
627 Token::new(TokenKind::Identifier("age".to_string()), 4, 7),
628 Token::new(TokenKind::Equals, 8, 9),
629 Token::new(TokenKind::Int(1), 10, 11),
630 Token::new(TokenKind::Semicolon, 11, 12),
631 Token::new(TokenKind::Eof, 12, 12),
632 ];
633
634 for expected in expected_tokens {
635 let token = lexer.next_token();
636 assert_eq!(token, expected);
637 }
638 }
639
640 #[test]
641 fn test_lex_let_string() {
642 let input = r#"let name = "Felipe";"#;
643 let mut lexer = Lexer::new(input.to_string());
644 let expected_tokens = vec![
645 Token::new(TokenKind::Let, 0, 3),
646 Token::new(TokenKind::Identifier("name".to_string()), 4, 8),
647 Token::new(TokenKind::Equals, 9, 10),
648 Token::new(TokenKind::String("Felipe".to_string()), 11, 19),
649 Token::new(TokenKind::Semicolon, 19, 20),
650 Token::new(TokenKind::Eof, 20, 20),
651 ];
652
653 for expected in expected_tokens {
654 let token = lexer.next_token();
655 assert_eq!(token, expected);
656 }
657 }
658
659 #[test]
660 fn test_lex_let_bool() {
661 let input = r#"let is_true = true;"#;
662 let mut lexer = Lexer::new(input.to_string());
663 let expected_tokens = vec![
664 Token::new(TokenKind::Let, 0, 3),
665 Token::new(TokenKind::Identifier("is_true".to_string()), 4, 11),
666 Token::new(TokenKind::Equals, 12, 13),
667 Token::new(TokenKind::Bool(true), 14, 18),
668 Token::new(TokenKind::Semicolon, 18, 19),
669 Token::new(TokenKind::Eof, 19, 19),
670 ];
671
672 for expected in expected_tokens {
673 let token = lexer.next_token();
674 assert_eq!(token, expected);
675 }
676 }
677
678 #[test]
679 fn test_lex_let_string_newline() {
680 let input = r#"
681 let name = "Felipe";
682 "#;
683 let mut lexer = Lexer::new(input.to_string());
684 let expected_tokens = vec![
685 Token::new(TokenKind::Let, 13, 16),
686 Token::new(TokenKind::Identifier("name".to_string()), 17, 21),
687 Token::new(TokenKind::Equals, 22, 23),
688 Token::new(TokenKind::String("Felipe".to_string()), 24, 32),
689 Token::new(TokenKind::Semicolon, 32, 33),
690 Token::new(TokenKind::Eof, 42, 42),
691 ];
692
693 for expected in expected_tokens {
694 let token = lexer.next_token();
695 assert_eq!(token, expected);
696 }
697 }
698
699 #[test]
700 fn test_lex_if() {
701 let input = r#"
702 if true {
703 let x = 10;
704 }
705 "#;
706
707 let mut lexer = Lexer::new(input.to_string());
708 let expected_tokens = vec![
709 Token::new(TokenKind::If, 13, 15),
710 Token::new(TokenKind::Bool(true), 16, 20),
711 Token::new(TokenKind::LBrace, 21, 22),
712 Token::new(TokenKind::Let, 39, 42),
713 Token::new(TokenKind::Identifier("x".to_string()), 43, 44),
714 Token::new(TokenKind::Equals, 45, 46),
715 Token::new(TokenKind::Int(10), 47, 49),
716 Token::new(TokenKind::Semicolon, 49, 50),
717 Token::new(TokenKind::RBrace, 63, 64),
718 Token::new(TokenKind::Eof, 73, 73),
719 ];
720
721 for expected in expected_tokens {
722 let token = lexer.next_token();
723 assert_eq!(token, expected);
724 }
725 }
726
727 #[test]
728 fn test_lex_if_else() {
729 let input = r#"
730 if true {
731 let x = 10;
732 } else {
733 let x = 20;
734 }
735 "#;
736
737 let mut lexer = Lexer::new(input.to_string());
738 let expected_tokens = vec![
739 Token::new(TokenKind::If, 13, 15),
740 Token::new(TokenKind::Bool(true), 16, 20),
741 Token::new(TokenKind::LBrace, 21, 22),
742 Token::new(TokenKind::Let, 39, 42),
743 Token::new(TokenKind::Identifier("x".to_string()), 43, 44),
744 Token::new(TokenKind::Equals, 45, 46),
745 Token::new(TokenKind::Int(10), 47, 49),
746 Token::new(TokenKind::Semicolon, 49, 50),
747 Token::new(TokenKind::RBrace, 63, 64),
748 Token::new(TokenKind::Else, 65, 69),
749 Token::new(TokenKind::LBrace, 70, 71),
750 Token::new(TokenKind::Let, 88, 91),
751 Token::new(TokenKind::Identifier("x".to_string()), 92, 93),
752 Token::new(TokenKind::Equals, 94, 95),
753 Token::new(TokenKind::Int(20), 96, 98),
754 Token::new(TokenKind::Semicolon, 98, 99),
755 Token::new(TokenKind::RBrace, 112, 113),
756 Token::new(TokenKind::Eof, 122, 122),
757 ];
758
759 for expected in expected_tokens {
760 let token = lexer.next_token();
761 assert_eq!(token, expected);
762 }
763 }
764
765 #[test]
766 fn test_lex_struct() {
767 let input = r#"
768 struct Person {
769 name: string,
770 age: int,
771 }
772 "#;
773
774 let mut lexer = Lexer::new(input.to_string());
775 let expected_tokens = vec![
776 Token::new(TokenKind::Struct, 13, 19),
777 Token::new(TokenKind::Identifier("Person".to_string()), 20, 26),
778 Token::new(TokenKind::LBrace, 27, 28),
779 Token::new(TokenKind::Identifier("name".to_string()), 45, 49),
780 Token::new(TokenKind::Colon, 49, 50),
781 Token::new(TokenKind::Type("string".to_string()), 51, 57),
782 Token::new(TokenKind::Comma, 57, 58),
783 Token::new(TokenKind::Identifier("age".to_string()), 75, 78),
784 Token::new(TokenKind::Colon, 78, 79),
785 Token::new(TokenKind::Type("int".to_string()), 80, 83),
786 Token::new(TokenKind::Comma, 83, 84),
787 Token::new(TokenKind::RBrace, 97, 98),
788 Token::new(TokenKind::Eof, 107, 107),
789 ];
790
791 for expected in expected_tokens {
792 let token = lexer.next_token();
793 assert_eq!(token, expected);
794 }
795 }
796
797 #[test]
798 fn test_lex_struct_instance() {
799 let input = r#"
800 struct Person {
801 name: string,
802 age: int,
803 }
804
805 let p = Person {
806 name: "Felipe",
807 age: 30,
808 };
809 "#;
810
811 let mut lexer = Lexer::new(input.to_string());
812 let expected_tokens = vec![
813 Token::new(TokenKind::Struct, 13, 19),
814 Token::new(TokenKind::Identifier("Person".to_string()), 20, 26),
815 Token::new(TokenKind::LBrace, 27, 28),
816 Token::new(TokenKind::Identifier("name".to_string()), 45, 49),
817 Token::new(TokenKind::Colon, 49, 50),
818 Token::new(TokenKind::Type("string".to_string()), 51, 57),
819 Token::new(TokenKind::Comma, 57, 58),
820 Token::new(TokenKind::Identifier("age".to_string()), 75, 78),
821 Token::new(TokenKind::Colon, 78, 79),
822 Token::new(TokenKind::Type("int".to_string()), 80, 83),
823 Token::new(TokenKind::Comma, 83, 84),
824 Token::new(TokenKind::RBrace, 97, 98),
825 Token::new(TokenKind::Let, 112, 115),
826 Token::new(TokenKind::Identifier("p".to_string()), 116, 117),
827 Token::new(TokenKind::Equals, 118, 119),
828 Token::new(TokenKind::Identifier("Person".to_string()), 120, 126),
829 Token::new(TokenKind::LBrace, 127, 128),
830 Token::new(TokenKind::Identifier("name".to_string()), 145, 149),
831 Token::new(TokenKind::Colon, 149, 150),
832 Token::new(TokenKind::String("Felipe".to_string()), 151, 159),
833 Token::new(TokenKind::Comma, 159, 160),
834 Token::new(TokenKind::Identifier("age".to_string()), 177, 180),
835 Token::new(TokenKind::Colon, 180, 181),
836 Token::new(TokenKind::Int(30), 182, 184),
837 Token::new(TokenKind::Comma, 184, 185),
838 Token::new(TokenKind::RBrace, 198, 199),
839 Token::new(TokenKind::Semicolon, 199, 200),
840 Token::new(TokenKind::Eof, 209, 209),
841 ];
842
843 for expected in expected_tokens {
844 let token = lexer.next_token();
845 assert_eq!(token, expected);
846 }
847 }
848
849 #[test]
850 fn test_lex_struct_field_access() {
851 let input = "p.name";
852
853 let mut lexer = Lexer::new(input.to_string());
854 let expected_tokens = vec![
855 Token::new(TokenKind::Identifier("p".to_string()), 0, 1),
856 Token::new(TokenKind::Dot, 1, 2),
857 Token::new(TokenKind::Identifier("name".to_string()), 2, 6),
858 Token::new(TokenKind::Eof, 6, 6),
859 ];
860
861 for expected in expected_tokens {
862 let token = lexer.next_token();
863 assert_eq!(token, expected);
864 }
865 }
866
867 #[test]
868 fn test_lex_struct_field_set() {
869 let input = "client.age = 12;";
870
871 let mut lexer = Lexer::new(input.to_string());
872 let expected_tokens = vec![
873 Token::new(TokenKind::Identifier("client".to_string()), 0, 6),
874 Token::new(TokenKind::Dot, 6, 7),
875 Token::new(TokenKind::Identifier("age".to_string()), 7, 10),
876 Token::new(TokenKind::Equals, 11, 12),
877 Token::new(TokenKind::Int(12), 13, 15),
878 Token::new(TokenKind::Semicolon, 15, 16),
879 Token::new(TokenKind::Eof, 16, 16),
880 ];
881
882 for expected in expected_tokens {
883 let token = lexer.next_token();
884 assert_eq!(token, expected);
885 }
886 }
887
888 #[test]
889 fn test_lex_eq() {
890 let input = "x == 10;";
891
892 let mut lexer = Lexer::new(input.to_string());
893 let expected_tokens = vec![
894 Token::new(TokenKind::Identifier("x".to_string()), 0, 1),
895 Token::new(TokenKind::DblEquals, 2, 4),
896 Token::new(TokenKind::Int(10), 5, 7),
897 Token::new(TokenKind::Semicolon, 7, 8),
898 Token::new(TokenKind::Eof, 8, 8),
899 ];
900
901 for expected in expected_tokens {
902 let token = lexer.next_token();
903 assert_eq!(token, expected);
904 }
905 }
906
907 #[test]
908 fn test_lex_enum() {
909 let input = r#"
910 enum Option {
911 Some(String),
912 None,
913 }
914 "#;
915
916 let mut lexer = Lexer::new(input.to_string());
917 let expected_tokens = vec![
918 TokenKind::Enum,
919 TokenKind::Identifier("Option".to_string()),
920 TokenKind::LBrace,
921 TokenKind::Identifier("Some".to_string()),
922 TokenKind::LParen,
923 TokenKind::Identifier("String".to_string()),
924 TokenKind::RParen,
925 TokenKind::Comma,
926 TokenKind::Identifier("None".to_string()),
927 TokenKind::Comma,
928 TokenKind::RBrace,
929 TokenKind::Eof,
930 ];
931
932 for expected in expected_tokens {
933 let kind = lexer.next_token().kind;
934 assert_eq!(kind, expected);
935 }
936 }
937
938 #[test]
939 fn test_lex_enum_variant() {
940 let input = r"let c = Color::Red;";
941
942 let mut lexer = Lexer::new(input.to_string());
943 let expected_tokens = vec![
944 TokenKind::Let,
945 TokenKind::Identifier("c".to_string()),
946 TokenKind::Equals,
947 TokenKind::Identifier("Color".to_string()),
948 TokenKind::DblColon,
949 TokenKind::Identifier("Red".to_string()),
950 TokenKind::Semicolon,
951 TokenKind::Eof,
952 ];
953
954 for expected in expected_tokens {
955 let kind = lexer.next_token().kind;
956 assert_eq!(kind, expected);
957 }
958 }
959
960 #[test]
961 fn test_lex_match_enum() {
962 let code = r#"
963 match n {
964 Name::Existing(name) => name,
965 Name::NotExisting => "Not existing",
966 }
967 "#;
968
969 let mut lexer = Lexer::new(code);
970 let expected_tokens = vec![
971 TokenKind::Match,
972 TokenKind::Identifier("n".to_string()),
973 TokenKind::LBrace,
974 TokenKind::Identifier("Name".to_string()),
975 TokenKind::DblColon,
976 TokenKind::Identifier("Existing".to_string()),
977 TokenKind::LParen,
978 TokenKind::Identifier("name".to_string()),
979 TokenKind::RParen,
980 TokenKind::FatArrow,
981 TokenKind::Identifier("name".to_string()),
982 TokenKind::Comma,
983 TokenKind::Identifier("Name".to_string()),
984 TokenKind::DblColon,
985 TokenKind::Identifier("NotExisting".to_string()),
986 TokenKind::FatArrow,
987 TokenKind::String("Not existing".to_string()),
988 TokenKind::Comma,
989 TokenKind::RBrace,
990 TokenKind::Eof,
991 ];
992
993 for expected in expected_tokens {
994 let kind = lexer.next_token().kind;
995 assert_eq!(kind, expected);
996 }
997 }
998
999 #[test]
1000 fn test_lex_line_comments() {
1001 let code = r#"
1002 // This is a comment
1003 let x = 10; // Another comment
1004 "#;
1005
1006 let mut lexer = Lexer::new(code);
1007 let expected_tokens = vec![
1008 TokenKind::Let,
1009 TokenKind::Identifier("x".to_string()),
1010 TokenKind::Equals,
1011 TokenKind::Int(10),
1012 TokenKind::Semicolon,
1013 TokenKind::Eof,
1014 ];
1015
1016 for expected in expected_tokens {
1017 let kind = lexer.next_token().kind;
1018 assert_eq!(kind, expected);
1019 }
1020 }
1021
1022 #[test]
1023 fn test_lex_block_comments() {
1024 let code = r#"
1025 /* This is a multi
1026 * line comment */
1027 let x = 10; /* Another comment */
1028 "#;
1029
1030 let mut lexer = Lexer::new(code);
1031 let expected_tokens = vec![
1032 TokenKind::Let,
1033 TokenKind::Identifier("x".to_string()),
1034 TokenKind::Equals,
1035 TokenKind::Int(10),
1036 TokenKind::Semicolon,
1037 TokenKind::Eof,
1038 ];
1039
1040 for expected in expected_tokens {
1041 let kind = lexer.next_token().kind;
1042 assert_eq!(kind, expected);
1043 }
1044 }
1045
1046 #[test]
1047 fn test_lex_array() {
1048 let code = r#"
1049 let arr = [1,2,3,4,5];
1050 print(arr);
1051 "#;
1052
1053 let mut lexer = Lexer::new(code);
1054 let expected_tokens = vec![
1055 TokenKind::Let,
1056 TokenKind::Identifier("arr".to_string()),
1057 TokenKind::Equals,
1058 TokenKind::LSquare,
1059 TokenKind::Int(1),
1060 TokenKind::Comma,
1061 TokenKind::Int(2),
1062 TokenKind::Comma,
1063 TokenKind::Int(3),
1064 TokenKind::Comma,
1065 TokenKind::Int(4),
1066 TokenKind::Comma,
1067 TokenKind::Int(5),
1068 TokenKind::RSquare,
1069 TokenKind::Semicolon,
1070 TokenKind::Identifier("print".to_string()),
1071 TokenKind::LParen,
1072 TokenKind::Identifier("arr".to_string()),
1073 TokenKind::RParen,
1074 TokenKind::Semicolon,
1075 TokenKind::Eof,
1076 ];
1077
1078 for expected in expected_tokens {
1079 let kind = lexer.next_token().kind;
1080 assert_eq!(kind, expected);
1081 }
1082 }
1083
1084 #[test]
1085 fn test_lex_range() {
1086 let code = "let range = 1..10;";
1087 let mut lexer = Lexer::new(code);
1088 let expected_tokens = vec![
1089 TokenKind::Let,
1090 TokenKind::Identifier("range".to_string()),
1091 TokenKind::Equals,
1092 TokenKind::Int(1),
1093 TokenKind::DblDot,
1094 TokenKind::Int(10),
1095 TokenKind::Semicolon,
1096 TokenKind::Eof,
1097 ];
1098
1099 for expected in expected_tokens {
1100 let kind = lexer.next_token().kind;
1101 assert_eq!(kind, expected);
1102 }
1103 }
1104
1105 #[test]
1106 fn test_lex_inclusive_range() {
1107 let code = "let range = 1..=10;";
1108 let mut lexer = Lexer::new(code);
1109 let expected_tokens = vec![
1110 TokenKind::Let,
1111 TokenKind::Identifier("range".to_string()),
1112 TokenKind::Equals,
1113 TokenKind::Int(1),
1114 TokenKind::DblDotEquals,
1115 TokenKind::Int(10),
1116 TokenKind::Semicolon,
1117 TokenKind::Eof,
1118 ];
1119
1120 for expected in expected_tokens {
1121 let kind = lexer.next_token().kind;
1122 assert_eq!(kind, expected);
1123 }
1124 }
1125
1126 #[test]
1127 fn test_lex_from_start_range() {
1128 let code = "let range = ..10;";
1129 let mut lexer = Lexer::new(code);
1130 let expected_tokens = vec![
1131 TokenKind::Let,
1132 TokenKind::Identifier("range".to_string()),
1133 TokenKind::Equals,
1134 TokenKind::DblDot,
1135 TokenKind::Int(10),
1136 TokenKind::Semicolon,
1137 TokenKind::Eof,
1138 ];
1139
1140 for expected in expected_tokens {
1141 let kind = lexer.next_token().kind;
1142 assert_eq!(kind, expected);
1143 }
1144 }
1145
1146 #[test]
1147 fn test_lex_from_start_inclusive_range() {
1148 let code = "let range = ..=10;";
1149 let mut lexer = Lexer::new(code);
1150 let expected_tokens = vec![
1151 TokenKind::Let,
1152 TokenKind::Identifier("range".to_string()),
1153 TokenKind::Equals,
1154 TokenKind::DblDotEquals,
1155 TokenKind::Int(10),
1156 TokenKind::Semicolon,
1157 TokenKind::Eof,
1158 ];
1159
1160 for expected in expected_tokens {
1161 let kind = lexer.next_token().kind;
1162 assert_eq!(kind, expected);
1163 }
1164 }
1165
1166 #[test]
1167 fn test_lex_until_end_range() {
1168 let code = "let range = 1..;";
1169 let mut lexer = Lexer::new(code);
1170 let expected_tokens = vec![
1171 TokenKind::Let,
1172 TokenKind::Identifier("range".to_string()),
1173 TokenKind::Equals,
1174 TokenKind::Int(1),
1175 TokenKind::DblDot,
1176 TokenKind::Semicolon,
1177 TokenKind::Eof,
1178 ];
1179
1180 for expected in expected_tokens {
1181 let kind = lexer.next_token().kind;
1182 assert_eq!(kind, expected);
1183 }
1184 }
1185
1186 #[test]
1187 fn test_lex_until_end_inclusive_range() {
1188 let code = "let range = 1..=;";
1189 let mut lexer = Lexer::new(code);
1190 let expected_tokens = vec![
1191 TokenKind::Let,
1192 TokenKind::Identifier("range".to_string()),
1193 TokenKind::Equals,
1194 TokenKind::Int(1),
1195 TokenKind::DblDotEquals,
1196 TokenKind::Semicolon,
1197 TokenKind::Eof,
1198 ];
1199
1200 for expected in expected_tokens {
1201 let kind = lexer.next_token().kind;
1202 assert_eq!(kind, expected);
1203 }
1204 }
1205
1206 #[test]
1207 fn test_lex_plus_equals() {
1208 let code = "x += 10;";
1209 let mut lexer = Lexer::new(code);
1210 let expected_tokens = vec![
1211 TokenKind::Identifier("x".to_string()),
1212 TokenKind::PlusEquals,
1213 TokenKind::Int(10),
1214 TokenKind::Semicolon,
1215 TokenKind::Eof,
1216 ];
1217
1218 for expected in expected_tokens {
1219 let kind = lexer.next_token().kind;
1220 assert_eq!(kind, expected);
1221 }
1222 }
1223
1224 #[test]
1225 fn test_lex_less_equals() {
1226 let code = "x -= 10;";
1227 let mut lexer = Lexer::new(code);
1228 let expected_tokens = vec![
1229 TokenKind::Identifier("x".to_string()),
1230 TokenKind::MinusEquals,
1231 TokenKind::Int(10),
1232 TokenKind::Semicolon,
1233 TokenKind::Eof,
1234 ];
1235
1236 for expected in expected_tokens {
1237 let kind = lexer.next_token().kind;
1238 assert_eq!(kind, expected);
1239 }
1240 }
1241
1242 #[test]
1243 fn test_lex_times_equals() {
1244 let code = "x *= 10;";
1245 let mut lexer = Lexer::new(code);
1246 let expected_tokens = vec![
1247 TokenKind::Identifier("x".to_string()),
1248 TokenKind::StarEquals,
1249 TokenKind::Int(10),
1250 TokenKind::Semicolon,
1251 TokenKind::Eof,
1252 ];
1253
1254 for expected in expected_tokens {
1255 let kind = lexer.next_token().kind;
1256 assert_eq!(kind, expected);
1257 }
1258 }
1259
1260 #[test]
1261 fn test_lex_divide_equals() {
1262 let code = "x /= 10;";
1263 let mut lexer = Lexer::new(code);
1264 let expected_tokens = vec![
1265 TokenKind::Identifier("x".to_string()),
1266 TokenKind::SlashEquals,
1267 TokenKind::Int(10),
1268 TokenKind::Semicolon,
1269 TokenKind::Eof,
1270 ];
1271
1272 for expected in expected_tokens {
1273 let kind = lexer.next_token().kind;
1274 assert_eq!(kind, expected);
1275 }
1276 }
1277
1278 #[test]
1279 fn test_lex_mod_equals() {
1280 let code = "x %= 10;";
1281 let mut lexer = Lexer::new(code);
1282 let expected_tokens = vec![
1283 TokenKind::Identifier("x".to_string()),
1284 TokenKind::PercentEquals,
1285 TokenKind::Int(10),
1286 TokenKind::Semicolon,
1287 TokenKind::Eof,
1288 ];
1289
1290 for expected in expected_tokens {
1291 let kind = lexer.next_token().kind;
1292 assert_eq!(kind, expected);
1293 }
1294 }
1295
1296 #[test]
1297 fn test_comparison_operators() {
1298 let code = "x > 10; x < 10; x >= 10; x <= 10;";
1299 let mut lexer = Lexer::new(code);
1300 let expected_tokens = vec![
1301 TokenKind::Identifier("x".to_string()),
1302 TokenKind::GreaterThan,
1303 TokenKind::Int(10),
1304 TokenKind::Semicolon,
1305 TokenKind::Identifier("x".to_string()),
1306 TokenKind::LessThan,
1307 TokenKind::Int(10),
1308 TokenKind::Semicolon,
1309 TokenKind::Identifier("x".to_string()),
1310 TokenKind::GreaterThanEquals,
1311 TokenKind::Int(10),
1312 TokenKind::Semicolon,
1313 TokenKind::Identifier("x".to_string()),
1314 TokenKind::LessThanEquals,
1315 TokenKind::Int(10),
1316 TokenKind::Semicolon,
1317 TokenKind::Eof,
1318 ];
1319
1320 for expected in expected_tokens {
1321 let kind = lexer.next_token().kind;
1322 assert_eq!(kind, expected);
1323 }
1324 }
1325
1326 #[test]
1327 fn test_lex_while() {
1328 let code = r#"
1329 while x < 10 {
1330 x += 1;
1331 }
1332 "#;
1333
1334 let mut lexer = Lexer::new(code);
1335 let expected_tokens = vec![
1336 TokenKind::While,
1337 TokenKind::Identifier("x".to_string()),
1338 TokenKind::LessThan,
1339 TokenKind::Int(10),
1340 TokenKind::LBrace,
1341 TokenKind::Identifier("x".to_string()),
1342 TokenKind::PlusEquals,
1343 TokenKind::Int(1),
1344 TokenKind::Semicolon,
1345 TokenKind::RBrace,
1346 TokenKind::Eof,
1347 ];
1348
1349 for expected in expected_tokens {
1350 let kind = lexer.next_token().kind;
1351 assert_eq!(kind, expected);
1352 }
1353 }
1354
1355 #[test]
1356 fn test_lex_loop() {
1357 let code = r#"
1358 loop {
1359 x += 1;
1360 }
1361 "#;
1362
1363 let mut lexer = Lexer::new(code);
1364 let expected_tokens = vec![
1365 TokenKind::Loop,
1366 TokenKind::LBrace,
1367 TokenKind::Identifier("x".to_string()),
1368 TokenKind::PlusEquals,
1369 TokenKind::Int(1),
1370 TokenKind::Semicolon,
1371 TokenKind::RBrace,
1372 TokenKind::Eof,
1373 ];
1374
1375 for expected in expected_tokens {
1376 let kind = lexer.next_token().kind;
1377 assert_eq!(kind, expected);
1378 }
1379 }
1380
1381 #[test]
1382 fn test_lex_impl() {
1383 let input = r#"
1384 impl Point {
1385 fn new(x: int, y: int) -> Point {
1386 Point { x: x, y: y }
1387 }
1388 }
1389 "#;
1390
1391 let mut lexer = Lexer::new(input.to_string());
1392 let expected_tokens = vec![
1393 TokenKind::Impl,
1394 TokenKind::Identifier("Point".to_string()),
1395 TokenKind::LBrace,
1396 TokenKind::Function,
1397 TokenKind::Identifier("new".to_string()),
1398 TokenKind::LParen,
1399 TokenKind::Identifier("x".to_string()),
1400 TokenKind::Colon,
1401 TokenKind::Type("int".to_string()),
1402 TokenKind::Comma,
1403 TokenKind::Identifier("y".to_string()),
1404 TokenKind::Colon,
1405 TokenKind::Type("int".to_string()),
1406 TokenKind::RParen,
1407 TokenKind::Arrow,
1408 TokenKind::Identifier("Point".to_string()),
1409 TokenKind::LBrace,
1410 TokenKind::Identifier("Point".to_string()),
1411 TokenKind::LBrace,
1412 TokenKind::Identifier("x".to_string()),
1413 TokenKind::Colon,
1414 TokenKind::Identifier("x".to_string()),
1415 TokenKind::Comma,
1416 TokenKind::Identifier("y".to_string()),
1417 TokenKind::Colon,
1418 TokenKind::Identifier("y".to_string()),
1419 TokenKind::RBrace,
1420 TokenKind::RBrace,
1421 TokenKind::RBrace,
1422 TokenKind::Eof,
1423 ];
1424
1425 for expected in expected_tokens {
1426 let token = lexer.next_token();
1427 assert_eq!(token.kind, expected);
1428 }
1429 }
1430
1431 #[test]
1432 fn lex_method_call() {
1433 let code = "Point::new(3, 4)";
1434 let mut lexer = Lexer::new(code.to_string());
1435
1436 let expected_tokens = vec![
1437 TokenKind::Identifier("Point".to_string()),
1438 TokenKind::DblColon,
1439 TokenKind::Identifier("new".to_string()),
1440 TokenKind::LParen,
1441 TokenKind::Int(3),
1442 TokenKind::Comma,
1443 TokenKind::Int(4),
1444 TokenKind::RParen,
1445 ];
1446
1447 for expected in expected_tokens {
1448 let token = lexer.next_token();
1449 assert_eq!(token.kind, expected);
1450 }
1451 }
1452
1453 #[test]
1454 fn test_use_and_pub_keywords() {
1455 let input = r#"
1456 use local::models::User;
1457 pub fn hello() {
1458 println!("Hello");
1459 }
1460 "#;
1461 let mut lexer = Lexer::new(input.to_string());
1462
1463 let expected_tokens = vec![
1464 TokenKind::Use,
1465 TokenKind::Identifier("local".to_string()),
1466 TokenKind::DblColon,
1467 TokenKind::Identifier("models".to_string()),
1468 TokenKind::DblColon,
1469 TokenKind::Identifier("User".to_string()),
1470 TokenKind::Semicolon,
1471 TokenKind::Pub,
1472 TokenKind::Function,
1473 TokenKind::Identifier("hello".to_string()),
1474 TokenKind::LParen,
1475 TokenKind::RParen,
1476 TokenKind::LBrace,
1477 TokenKind::Identifier("println".to_string()),
1478 TokenKind::Bang,
1479 TokenKind::LParen,
1480 TokenKind::String("Hello".to_string()),
1481 TokenKind::RParen,
1482 TokenKind::Semicolon,
1483 TokenKind::RBrace,
1484 ];
1485
1486 for expected in expected_tokens {
1487 let token = lexer.next_token();
1488 assert_eq!(token.kind, expected);
1489 }
1490 }
1491}