1use crate::error::{LexError, Position};
7
8#[derive(Debug, Clone, PartialEq)]
10pub struct Token {
11 pub kind: TokenKind,
13 pub position: Position,
15}
16
17impl Token {
18 pub fn new(kind: TokenKind, position: Position) -> Self {
20 Self { kind, position }
21 }
22}
23
24#[derive(Debug, Clone, PartialEq)]
26pub enum TokenKind {
27 Integer(i64),
30 Float(f64),
32 String(String),
34
35 Identifier(String),
38 Let,
40 If,
42 Else,
44 For,
46 Fn,
48 Return,
50 True,
52 False,
54 Null,
56
57 Plus,
60 Minus,
62 Star,
64 Slash,
66 Percent,
68 Assign,
70 Equal,
72 NotEqual,
74 LessThan,
76 LessEqual,
78 GreaterThan,
80 GreaterEqual,
82 Bang,
84 And,
86 Or,
88
89 LeftParen,
92 RightParen,
94 LeftBrace,
96 RightBrace,
98 LeftBracket,
100 RightBracket,
102 Comma,
104 Semicolon,
106 Colon,
108 Dot,
110
111 Eof,
114}
115
116impl std::fmt::Display for TokenKind {
117 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
118 match self {
119 TokenKind::Integer(n) => write!(f, "{}", n),
120 TokenKind::Float(fl) => write!(f, "{}", fl),
121 TokenKind::String(s) => write!(f, "\"{}\"", s),
122 TokenKind::Identifier(s) => write!(f, "{}", s),
123 TokenKind::Let => write!(f, "let"),
124 TokenKind::If => write!(f, "if"),
125 TokenKind::Else => write!(f, "else"),
126 TokenKind::For => write!(f, "for"),
127 TokenKind::Fn => write!(f, "fn"),
128 TokenKind::Return => write!(f, "return"),
129 TokenKind::True => write!(f, "true"),
130 TokenKind::False => write!(f, "false"),
131 TokenKind::Null => write!(f, "null"),
132 TokenKind::Plus => write!(f, "+"),
133 TokenKind::Minus => write!(f, "-"),
134 TokenKind::Star => write!(f, "*"),
135 TokenKind::Slash => write!(f, "/"),
136 TokenKind::Percent => write!(f, "%"),
137 TokenKind::Assign => write!(f, "="),
138 TokenKind::Equal => write!(f, "=="),
139 TokenKind::NotEqual => write!(f, "!="),
140 TokenKind::LessThan => write!(f, "<"),
141 TokenKind::LessEqual => write!(f, "<="),
142 TokenKind::GreaterThan => write!(f, ">"),
143 TokenKind::GreaterEqual => write!(f, ">="),
144 TokenKind::Bang => write!(f, "!"),
145 TokenKind::And => write!(f, "&&"),
146 TokenKind::Or => write!(f, "||"),
147 TokenKind::LeftParen => write!(f, "("),
148 TokenKind::RightParen => write!(f, ")"),
149 TokenKind::LeftBrace => write!(f, "{{"),
150 TokenKind::RightBrace => write!(f, "}}"),
151 TokenKind::LeftBracket => write!(f, "["),
152 TokenKind::RightBracket => write!(f, "]"),
153 TokenKind::Comma => write!(f, ","),
154 TokenKind::Semicolon => write!(f, ";"),
155 TokenKind::Colon => write!(f, ":"),
156 TokenKind::Dot => write!(f, "."),
157 TokenKind::Eof => write!(f, "EOF"),
158 }
159 }
160}
161
162pub struct Lexer<'a> {
164 source: &'a str,
166 chars: std::iter::Peekable<std::str::CharIndices<'a>>,
168 line: usize,
170 column: usize,
172 offset: usize,
174 at_eof: bool,
176}
177
178impl<'a> Lexer<'a> {
179 pub fn new(source: &'a str) -> Self {
181 Self {
182 source,
183 chars: source.char_indices().peekable(),
184 line: 1,
185 column: 1,
186 offset: 0,
187 at_eof: false,
188 }
189 }
190
191 fn position(&self) -> Position {
193 Position::new(self.line, self.column, self.offset)
194 }
195
196 fn advance(&mut self) -> Option<char> {
198 if let Some((idx, ch)) = self.chars.next() {
199 self.offset = idx + ch.len_utf8();
200 if ch == '\n' {
201 self.line += 1;
202 self.column = 1;
203 } else {
204 self.column += 1;
205 }
206 Some(ch)
207 } else {
208 None
209 }
210 }
211
212 fn peek(&mut self) -> Option<char> {
214 self.chars.peek().map(|(_, ch)| *ch)
215 }
216
217 fn peek_next(&self) -> Option<char> {
219 let mut iter = self.chars.clone();
220 iter.next(); iter.peek().map(|(_, ch)| *ch)
222 }
223
224 fn skip_whitespace_and_comments(&mut self) {
226 loop {
227 while let Some(ch) = self.peek() {
229 if ch.is_whitespace() {
230 self.advance();
231 } else {
232 break;
233 }
234 }
235
236 if self.peek() == Some('/') && self.peek_next() == Some('/') {
238 self.advance(); self.advance(); while let Some(ch) = self.peek() {
242 if ch == '\n' {
243 break;
244 }
245 self.advance();
246 }
247 continue; }
249
250 break;
251 }
252 }
253
254 fn scan_identifier(&mut self, first_char: char, start_pos: Position) -> Token {
256 let start_offset = self.offset - first_char.len_utf8();
257
258 while let Some(ch) = self.peek() {
259 if ch.is_alphanumeric() || ch == '_' {
260 self.advance();
261 } else {
262 break;
263 }
264 }
265
266 let text = &self.source[start_offset..self.offset];
267 let kind = match text {
268 "let" => TokenKind::Let,
269 "if" => TokenKind::If,
270 "else" => TokenKind::Else,
271 "for" => TokenKind::For,
272 "fn" => TokenKind::Fn,
273 "return" => TokenKind::Return,
274 "true" => TokenKind::True,
275 "false" => TokenKind::False,
276 "null" => TokenKind::Null,
277 _ => TokenKind::Identifier(text.to_string()),
278 };
279
280 Token::new(kind, start_pos)
281 }
282
283 fn scan_number(&mut self, first_char: char, start_pos: Position) -> Result<Token, LexError> {
285 let start_offset = self.offset - first_char.len_utf8();
286 let mut has_decimal = false;
287
288 while let Some(ch) = self.peek() {
290 if ch.is_ascii_digit() {
291 self.advance();
292 } else if ch == '.' && !has_decimal {
293 if let Some(next_ch) = self.peek_next() {
295 if next_ch.is_ascii_digit() {
296 has_decimal = true;
297 self.advance(); } else {
299 break;
301 }
302 } else {
303 break;
305 }
306 } else {
307 break;
308 }
309 }
310
311 let text = &self.source[start_offset..self.offset];
312
313 if has_decimal {
314 match text.parse::<f64>() {
316 Ok(value) => Ok(Token::new(TokenKind::Float(value), start_pos)),
317 Err(_) => Err(LexError::InvalidNumber(start_pos)),
318 }
319 } else {
320 match text.parse::<i64>() {
322 Ok(value) => Ok(Token::new(TokenKind::Integer(value), start_pos)),
323 Err(_) => Err(LexError::InvalidNumber(start_pos)),
324 }
325 }
326 }
327
328 fn scan_string(&mut self, start_pos: Position) -> Result<Token, LexError> {
330 let mut value = String::new();
331
332 loop {
333 match self.advance() {
334 Some('"') => break,
335 Some('\\') => {
336 match self.advance() {
338 Some('n') => value.push('\n'),
339 Some('t') => value.push('\t'),
340 Some('r') => value.push('\r'),
341 Some('\\') => value.push('\\'),
342 Some('"') => value.push('"'),
343 Some(ch) => return Err(LexError::InvalidEscape(ch, self.position())),
344 None => return Err(LexError::UnterminatedString(start_pos)),
345 }
346 }
347 Some(ch) => value.push(ch),
348 None => return Err(LexError::UnterminatedString(start_pos)),
349 }
350 }
351
352 Ok(Token::new(TokenKind::String(value), start_pos))
353 }
354
355 pub fn next_token(&mut self) -> Result<Token, LexError> {
357 self.skip_whitespace_and_comments();
358
359 let pos = self.position();
360
361 let Some(ch) = self.advance() else {
362 self.at_eof = true;
363 return Ok(Token::new(TokenKind::Eof, pos));
364 };
365
366 let kind = match ch {
367 '+' => TokenKind::Plus,
369 '-' => TokenKind::Minus,
370 '*' => TokenKind::Star,
371 '/' => TokenKind::Slash,
372 '%' => TokenKind::Percent,
373 '(' => TokenKind::LeftParen,
374 ')' => TokenKind::RightParen,
375 '{' => TokenKind::LeftBrace,
376 '}' => TokenKind::RightBrace,
377 '[' => TokenKind::LeftBracket,
378 ']' => TokenKind::RightBracket,
379 ',' => TokenKind::Comma,
380 ';' => TokenKind::Semicolon,
381 ':' => TokenKind::Colon,
382 '.' => TokenKind::Dot,
383
384 '=' => {
386 if self.peek() == Some('=') {
387 self.advance();
388 TokenKind::Equal
389 } else {
390 TokenKind::Assign
391 }
392 }
393 '!' => {
394 if self.peek() == Some('=') {
395 self.advance();
396 TokenKind::NotEqual
397 } else {
398 TokenKind::Bang
399 }
400 }
401 '<' => {
402 if self.peek() == Some('=') {
403 self.advance();
404 TokenKind::LessEqual
405 } else {
406 TokenKind::LessThan
407 }
408 }
409 '>' => {
410 if self.peek() == Some('=') {
411 self.advance();
412 TokenKind::GreaterEqual
413 } else {
414 TokenKind::GreaterThan
415 }
416 }
417 '&' => {
418 if self.peek() == Some('&') {
419 self.advance();
420 TokenKind::And
421 } else {
422 return Err(LexError::UnexpectedCharacter(ch, pos));
423 }
424 }
425 '|' => {
426 if self.peek() == Some('|') {
427 self.advance();
428 TokenKind::Or
429 } else {
430 return Err(LexError::UnexpectedCharacter(ch, pos));
431 }
432 }
433
434 '"' => return self.scan_string(pos),
436
437 ch if ch.is_ascii_digit() => return self.scan_number(ch, pos),
439
440 ch if ch.is_alphabetic() || ch == '_' => {
442 return Ok(self.scan_identifier(ch, pos));
443 }
444
445 _ => return Err(LexError::UnexpectedCharacter(ch, pos)),
447 };
448
449 Ok(Token::new(kind, pos))
450 }
451
452 pub fn tokenize(&mut self) -> Result<Vec<Token>, LexError> {
454 let mut tokens = Vec::new();
455 loop {
456 let token = self.next_token()?;
457 let is_eof = token.kind == TokenKind::Eof;
458 tokens.push(token);
459 if is_eof {
460 break;
461 }
462 }
463 Ok(tokens)
464 }
465}
466
467#[cfg(test)]
468mod tests {
469 use super::*;
470
471 #[test]
472 fn test_empty_source() {
473 let mut lexer = Lexer::new("");
474 let token = lexer.next_token().unwrap();
475 assert_eq!(token.kind, TokenKind::Eof);
476 }
477
478 #[test]
479 fn test_single_tokens() {
480 let mut lexer = Lexer::new("+ - * / % ( ) { } , ;");
481 let tokens = lexer.tokenize().unwrap();
482 assert!(matches!(tokens[0].kind, TokenKind::Plus));
483 assert!(matches!(tokens[1].kind, TokenKind::Minus));
484 assert!(matches!(tokens[2].kind, TokenKind::Star));
485 assert!(matches!(tokens[3].kind, TokenKind::Slash));
486 assert!(matches!(tokens[4].kind, TokenKind::Percent));
487 assert!(matches!(tokens[5].kind, TokenKind::LeftParen));
488 assert!(matches!(tokens[6].kind, TokenKind::RightParen));
489 assert!(matches!(tokens[7].kind, TokenKind::LeftBrace));
490 assert!(matches!(tokens[8].kind, TokenKind::RightBrace));
491 assert!(matches!(tokens[9].kind, TokenKind::Comma));
492 assert!(matches!(tokens[10].kind, TokenKind::Semicolon));
493 }
494
495 #[test]
496 fn test_comparison_operators() {
497 let mut lexer = Lexer::new("= == != < <= > >=");
498 let tokens = lexer.tokenize().unwrap();
499 assert!(matches!(tokens[0].kind, TokenKind::Assign));
500 assert!(matches!(tokens[1].kind, TokenKind::Equal));
501 assert!(matches!(tokens[2].kind, TokenKind::NotEqual));
502 assert!(matches!(tokens[3].kind, TokenKind::LessThan));
503 assert!(matches!(tokens[4].kind, TokenKind::LessEqual));
504 assert!(matches!(tokens[5].kind, TokenKind::GreaterThan));
505 assert!(matches!(tokens[6].kind, TokenKind::GreaterEqual));
506 }
507
508 #[test]
509 fn test_logical_operators() {
510 let mut lexer = Lexer::new("! && ||");
511 let tokens = lexer.tokenize().unwrap();
512 assert!(matches!(tokens[0].kind, TokenKind::Bang));
513 assert!(matches!(tokens[1].kind, TokenKind::And));
514 assert!(matches!(tokens[2].kind, TokenKind::Or));
515 }
516
517 #[test]
518 fn test_keywords() {
519 let mut lexer = Lexer::new("let if else for fn return true false null");
520 let tokens = lexer.tokenize().unwrap();
521 assert!(matches!(tokens[0].kind, TokenKind::Let));
522 assert!(matches!(tokens[1].kind, TokenKind::If));
523 assert!(matches!(tokens[2].kind, TokenKind::Else));
524 assert!(matches!(tokens[3].kind, TokenKind::For));
525 assert!(matches!(tokens[4].kind, TokenKind::Fn));
526 assert!(matches!(tokens[5].kind, TokenKind::Return));
527 assert!(matches!(tokens[6].kind, TokenKind::True));
528 assert!(matches!(tokens[7].kind, TokenKind::False));
529 assert!(matches!(tokens[8].kind, TokenKind::Null));
530 }
531
532 #[test]
533 fn test_identifier() {
534 let mut lexer = Lexer::new("foo bar_123 _test");
535 let tokens = lexer.tokenize().unwrap();
536 assert!(matches!(&tokens[0].kind, TokenKind::Identifier(s) if s == "foo"));
537 assert!(matches!(&tokens[1].kind, TokenKind::Identifier(s) if s == "bar_123"));
538 assert!(matches!(&tokens[2].kind, TokenKind::Identifier(s) if s == "_test"));
539 }
540
541 #[test]
542 fn test_integer() {
543 let mut lexer = Lexer::new("42 0 12345");
544 let tokens = lexer.tokenize().unwrap();
545 assert!(matches!(tokens[0].kind, TokenKind::Integer(42)));
546 assert!(matches!(tokens[1].kind, TokenKind::Integer(0)));
547 assert!(matches!(tokens[2].kind, TokenKind::Integer(12345)));
548 }
549
550 #[test]
551 fn test_string() {
552 let mut lexer = Lexer::new(r#""hello" "world" "with\nescapes""#);
553 let tokens = lexer.tokenize().unwrap();
554 assert!(matches!(&tokens[0].kind, TokenKind::String(s) if s == "hello"));
555 assert!(matches!(&tokens[1].kind, TokenKind::String(s) if s == "world"));
556 assert!(matches!(&tokens[2].kind, TokenKind::String(s) if s == "with\nescapes"));
557 }
558
559 #[test]
560 fn test_comments() {
561 let mut lexer = Lexer::new("let x = 10; // this is a comment\nlet y = 20;");
562 let tokens = lexer.tokenize().unwrap();
563 assert_eq!(tokens.len(), 11);
565 assert!(matches!(tokens[0].kind, TokenKind::Let));
566 assert!(matches!(tokens[5].kind, TokenKind::Let));
567 }
568
569 #[test]
570 fn test_position_tracking() {
571 let mut lexer = Lexer::new("let x\ny");
572 let tokens = lexer.tokenize().unwrap();
573 assert_eq!(tokens[0].position.line, 1);
574 assert_eq!(tokens[0].position.column, 1);
575 assert_eq!(tokens[1].position.line, 1);
576 assert_eq!(tokens[1].position.column, 5);
577 assert_eq!(tokens[2].position.line, 2);
578 assert_eq!(tokens[2].position.column, 1);
579 }
580
581 #[test]
582 fn test_unterminated_string() {
583 let mut lexer = Lexer::new(r#""hello"#);
584 let result = lexer.next_token();
585 assert!(matches!(result, Err(LexError::UnterminatedString(_))));
586 }
587
588 #[test]
589 fn test_unexpected_character() {
590 let mut lexer = Lexer::new("@");
591 let result = lexer.next_token();
592 assert!(matches!(result, Err(LexError::UnexpectedCharacter('@', _))));
593 }
594
595 #[test]
596 fn test_dot_token() {
597 let mut lexer = Lexer::new(".");
598 let tokens = lexer.tokenize().unwrap();
599 assert!(matches!(tokens[0].kind, TokenKind::Dot));
600 }
601
602 #[test]
603 fn test_method_call_tokens() {
604 let mut lexer = Lexer::new("foo.bar()");
605 let tokens = lexer.tokenize().unwrap();
606 assert!(matches!(&tokens[0].kind, TokenKind::Identifier(s) if s == "foo"));
607 assert!(matches!(tokens[1].kind, TokenKind::Dot));
608 assert!(matches!(&tokens[2].kind, TokenKind::Identifier(s) if s == "bar"));
609 assert!(matches!(tokens[3].kind, TokenKind::LeftParen));
610 assert!(matches!(tokens[4].kind, TokenKind::RightParen));
611 }
612}