1use crate::token::Token;
7
8pub struct Lexer {
10 input: Vec<char>,
11 position: usize, read_position: usize, ch: char, line: usize, column: usize, had_whitespace_before_token: bool, }
18
19impl Lexer {
20 pub fn new(input: &str) -> Self {
22 let mut lexer = Lexer {
23 input: input.chars().collect(),
24 position: 0,
25 read_position: 0,
26 ch: '\0',
27 line: 1,
28 column: 0,
29 had_whitespace_before_token: false,
30 };
31 lexer.read_char(); lexer
33 }
34
35 pub fn line(&self) -> usize {
37 self.line
38 }
39
40 pub fn column(&self) -> usize {
42 self.column
43 }
44
45 pub fn had_whitespace(&self) -> bool {
47 self.had_whitespace_before_token
48 }
49
50 fn read_char(&mut self) {
52 if self.read_position >= self.input.len() {
53 self.ch = '\0'; } else {
55 self.ch = self.input[self.read_position];
56 }
57
58 if self.ch == '\n' {
60 self.line += 1;
61 self.column = 0;
62 } else {
63 self.column += 1;
64 }
65
66 self.position = self.read_position;
67 self.read_position += 1;
68 }
69
70 fn peek_char(&self) -> char {
72 if self.read_position >= self.input.len() {
73 '\0'
74 } else {
75 self.input[self.read_position]
76 }
77 }
78
79 fn peek_char_n(&self, n: usize) -> char {
81 let pos = self.position + n;
82 if pos >= self.input.len() {
83 '\0'
84 } else {
85 self.input[pos]
86 }
87 }
88
89 pub fn next_token(&mut self) -> Token {
91 let had_ws = self.skip_whitespace();
92 self.had_whitespace_before_token = had_ws;
93
94 let token = match self.ch {
95 '+' => Token::Plus,
97 '-' => {
98 if self.peek_char() == '>' {
99 self.read_char();
100 Token::Arrow
101 } else {
102 Token::Minus
103 }
104 }
105 '*' => Token::Multiply,
106 '/' => {
107 if self.peek_char() == '/' {
109 self.skip_line_comment();
110 return self.next_token();
111 } else if self.peek_char() == '*' {
112 self.skip_block_comment();
113 return self.next_token();
114 } else {
115 Token::Divide
116 }
117 }
118 '%' => Token::Modulo,
119
120 '=' => {
122 if self.peek_char() == '=' {
123 self.read_char();
124 Token::Equal
125 } else {
126 Token::Assign
127 }
128 }
129 '!' => {
130 if self.peek_char() == '=' {
131 self.read_char();
132 Token::NotEqual
133 } else {
134 Token::Not
135 }
136 }
137 '<' => {
138 if self.peek_char() == '=' {
139 self.read_char();
140 Token::LessEqual
141 } else {
142 Token::Less
143 }
144 }
145 '>' => {
146 if self.peek_char() == '=' {
147 self.read_char();
148 Token::GreaterEqual
149 } else {
150 Token::Greater
151 }
152 }
153 '&' => {
154 if self.peek_char() == '&' {
155 self.read_char();
156 Token::And
157 } else {
158 Token::Illegal('&')
159 }
160 }
161 '|' => {
162 if self.peek_char() == '|' {
163 self.read_char();
164 Token::Or
165 } else {
166 Token::Illegal('|')
167 }
168 }
169
170 '(' => Token::LeftParen,
172 ')' => Token::RightParen,
173 '{' => Token::LeftBrace,
174 '}' => Token::RightBrace,
175 '[' => Token::LeftBracket,
176 ']' => Token::RightBracket,
177 ',' => Token::Comma,
178 ':' => Token::Colon,
179 ';' => Token::Semicolon,
180
181 '"' => {
183 if self.peek_char() == '"' && self.peek_char_n(2) == '"' {
185 return self.read_multiline_string();
186 } else {
187 return self.read_string();
188 }
189 }
190
191 '\n' => Token::Newline,
193
194 '\0' => Token::EOF,
196
197 _ => {
199 if self.ch.is_alphabetic() || self.ch == '_' {
200 return self.read_identifier();
201 } else if self.ch.is_numeric() {
202 return self.read_number();
203 } else {
204 Token::Illegal(self.ch)
205 }
206 }
207 };
208
209 self.read_char();
210 token
211 }
212
213 fn skip_whitespace(&mut self) -> bool {
216 let mut skipped = false;
217 while self.ch == ' ' || self.ch == '\t' || self.ch == '\r' {
218 skipped = true;
219 self.read_char();
220 }
221 skipped
222 }
223
224 fn skip_line_comment(&mut self) {
226 while self.ch != '\n' && self.ch != '\0' {
227 self.read_char();
228 }
229 }
230
231 fn skip_block_comment(&mut self) {
233 self.read_char(); self.read_char(); while !(self.ch == '*' && self.peek_char() == '/') && self.ch != '\0' {
237 if self.ch == '\n' {
238 self.line += 1;
239 self.column = 0;
240 }
241 self.read_char();
242 }
243
244 if self.ch != '\0' {
245 self.read_char(); self.read_char(); }
248 }
249
250 fn read_identifier(&mut self) -> Token {
252 let start = self.position;
253
254 while self.ch.is_alphanumeric() || self.ch == '_' {
256 self.read_char();
257 }
258
259 let ident: String = self.input[start..self.position].iter().collect();
260 Token::lookup_keyword(&ident)
261 }
262
263 fn read_number(&mut self) -> Token {
265 let start = self.position;
266 let mut has_dot = false;
267
268 while self.ch.is_numeric() || (self.ch == '.' && !has_dot) {
269 if self.ch == '.' {
270 if !self.peek_char().is_numeric() {
272 break;
273 }
274 has_dot = true;
275 }
276 self.read_char();
277 }
278
279 let num_str: String = self.input[start..self.position].iter().collect();
280
281 if !has_dot && num_str.len() > 15 {
283 return Token::BigInteger(num_str);
284 }
285
286 match num_str.parse::<f64>() {
287 Ok(num) => Token::Number(num),
288 Err(_) => Token::Illegal('0'), }
290 }
291
292 fn read_string(&mut self) -> Token {
294 self.read_char(); let start = self.position;
296
297 while self.ch != '"' && self.ch != '\0' {
298 if self.ch == '\\' {
300 self.read_char(); if self.ch != '\0' {
302 self.read_char(); }
304 } else {
305 if self.ch == '\n' {
306 self.line += 1;
307 self.column = 0;
308 }
309 self.read_char();
310 }
311 }
312
313 if self.ch == '\0' {
314 return Token::Illegal('"'); }
316
317 let string: String = self.input[start..self.position].iter().collect();
318 self.read_char(); Token::String(self.process_escapes(&string))
322 }
323
324 fn read_multiline_string(&mut self) -> Token {
326 self.read_char(); self.read_char(); self.read_char(); let start = self.position;
332
333 loop {
335 if self.ch == '\0' {
336 return Token::Illegal('"'); }
338
339 if self.ch == '"' && self.peek_char() == '"' && self.peek_char_n(2) == '"' {
341 let string: String = self.input[start..self.position].iter().collect();
342
343 self.read_char(); self.read_char(); self.read_char(); return Token::String(self.process_escapes(&string));
350 }
351
352 if self.ch == '\n' {
354 self.line += 1;
355 self.column = 0;
356 }
357
358 self.read_char();
359 }
360 }
361
362 fn process_escapes(&self, s: &str) -> String {
364 let mut result = String::new();
365 let mut chars = s.chars();
366
367 while let Some(ch) = chars.next() {
368 if ch == '\\' {
369 match chars.next() {
370 Some('n') => result.push('\n'),
371 Some('t') => result.push('\t'),
372 Some('r') => result.push('\r'),
373 Some('\\') => result.push('\\'),
374 Some('"') => result.push('"'),
375 Some(c) => {
376 result.push('\\');
377 result.push(c);
378 }
379 None => result.push('\\'),
380 }
381 } else {
382 result.push(ch);
383 }
384 }
385
386 result
387 }
388}
389
390#[cfg(test)]
391mod tests {
392 use super::*;
393
394 #[test]
395 fn test_basic_tokens() {
396 let input = "Set X 10";
397 let mut lexer = Lexer::new(input);
398
399 assert_eq!(lexer.next_token(), Token::Set);
400 assert_eq!(lexer.next_token(), Token::Identifier("X".to_string()));
401 assert_eq!(lexer.next_token(), Token::Number(10.0));
402 assert_eq!(lexer.next_token(), Token::EOF);
403 }
404
405 #[test]
406 fn test_operators() {
407 let input = "+ - * / % == != < <= > >= && || !";
408 let mut lexer = Lexer::new(input);
409
410 assert_eq!(lexer.next_token(), Token::Plus);
411 assert_eq!(lexer.next_token(), Token::Minus);
412 assert_eq!(lexer.next_token(), Token::Multiply);
413 assert_eq!(lexer.next_token(), Token::Divide);
414 assert_eq!(lexer.next_token(), Token::Modulo);
415 assert_eq!(lexer.next_token(), Token::Equal);
416 assert_eq!(lexer.next_token(), Token::NotEqual);
417 assert_eq!(lexer.next_token(), Token::Less);
418 assert_eq!(lexer.next_token(), Token::LessEqual);
419 assert_eq!(lexer.next_token(), Token::Greater);
420 assert_eq!(lexer.next_token(), Token::GreaterEqual);
421 assert_eq!(lexer.next_token(), Token::And);
422 assert_eq!(lexer.next_token(), Token::Or);
423 assert_eq!(lexer.next_token(), Token::Not);
424 assert_eq!(lexer.next_token(), Token::EOF);
425 }
426
427 #[test]
428 fn test_string_literal() {
429 let input = r#"Set MSG "Hello World""#;
430 let mut lexer = Lexer::new(input);
431
432 assert_eq!(lexer.next_token(), Token::Set);
433 assert_eq!(lexer.next_token(), Token::Identifier("MSG".to_string()));
434 assert_eq!(lexer.next_token(), Token::String("Hello World".to_string()));
435 assert_eq!(lexer.next_token(), Token::EOF);
436 }
437
438 #[test]
439 fn test_string_with_escapes() {
440 let input = r#""Hello\nWorld\t!""#;
441 let mut lexer = Lexer::new(input);
442
443 assert_eq!(
444 lexer.next_token(),
445 Token::String("Hello\nWorld\t!".to_string())
446 );
447 }
448
449 #[test]
450 fn test_numbers() {
451 let input = "123 45.67 0.5";
452 let mut lexer = Lexer::new(input);
453
454 assert_eq!(lexer.next_token(), Token::Number(123.0));
455 assert_eq!(lexer.next_token(), Token::Number(45.67));
456 assert_eq!(lexer.next_token(), Token::Number(0.5));
457 assert_eq!(lexer.next_token(), Token::EOF);
458 }
459
460 #[test]
461 fn test_keywords() {
462 let input = "Set Func If Else While For Return True False Null";
463 let mut lexer = Lexer::new(input);
464
465 assert_eq!(lexer.next_token(), Token::Set);
466 assert_eq!(lexer.next_token(), Token::Func);
467 assert_eq!(lexer.next_token(), Token::If);
468 assert_eq!(lexer.next_token(), Token::Else);
469 assert_eq!(lexer.next_token(), Token::While);
470 assert_eq!(lexer.next_token(), Token::For);
471 assert_eq!(lexer.next_token(), Token::Return);
472 assert_eq!(lexer.next_token(), Token::Boolean(true));
473 assert_eq!(lexer.next_token(), Token::Boolean(false));
474 assert_eq!(lexer.next_token(), Token::Null);
475 assert_eq!(lexer.next_token(), Token::EOF);
476 }
477
478 #[test]
479 fn test_identifiers() {
480 let input = "USER_NAME CALCULATE_TOTAL MY_VAR";
481 let mut lexer = Lexer::new(input);
482
483 assert_eq!(
484 lexer.next_token(),
485 Token::Identifier("USER_NAME".to_string())
486 );
487 assert_eq!(
488 lexer.next_token(),
489 Token::Identifier("CALCULATE_TOTAL".to_string())
490 );
491 assert_eq!(lexer.next_token(), Token::Identifier("MY_VAR".to_string()));
492 assert_eq!(lexer.next_token(), Token::EOF);
493 }
494
495 #[test]
496 fn test_delimiters() {
497 let input = "( ) { } [ ] , : ;";
498 let mut lexer = Lexer::new(input);
499
500 assert_eq!(lexer.next_token(), Token::LeftParen);
501 assert_eq!(lexer.next_token(), Token::RightParen);
502 assert_eq!(lexer.next_token(), Token::LeftBrace);
503 assert_eq!(lexer.next_token(), Token::RightBrace);
504 assert_eq!(lexer.next_token(), Token::LeftBracket);
505 assert_eq!(lexer.next_token(), Token::RightBracket);
506 assert_eq!(lexer.next_token(), Token::Comma);
507 assert_eq!(lexer.next_token(), Token::Colon);
508 assert_eq!(lexer.next_token(), Token::Semicolon);
509 assert_eq!(lexer.next_token(), Token::EOF);
510 }
511
512 #[test]
513 fn test_line_comment() {
514 let input = "Set X 10 // This is a comment\nSet Y 20";
515 let mut lexer = Lexer::new(input);
516
517 assert_eq!(lexer.next_token(), Token::Set);
518 assert_eq!(lexer.next_token(), Token::Identifier("X".to_string()));
519 assert_eq!(lexer.next_token(), Token::Number(10.0));
520 assert_eq!(lexer.next_token(), Token::Newline);
521 assert_eq!(lexer.next_token(), Token::Set);
522 assert_eq!(lexer.next_token(), Token::Identifier("Y".to_string()));
523 assert_eq!(lexer.next_token(), Token::Number(20.0));
524 }
525
526 #[test]
527 fn test_block_comment() {
528 let input = "Set X /* block comment */ 10";
529 let mut lexer = Lexer::new(input);
530
531 assert_eq!(lexer.next_token(), Token::Set);
532 assert_eq!(lexer.next_token(), Token::Identifier("X".to_string()));
533 assert_eq!(lexer.next_token(), Token::Number(10.0));
534 }
535
536 #[test]
537 fn test_newlines() {
538 let input = "Set X 10\nSet Y 20";
539 let mut lexer = Lexer::new(input);
540
541 assert_eq!(lexer.next_token(), Token::Set);
542 assert_eq!(lexer.next_token(), Token::Identifier("X".to_string()));
543 assert_eq!(lexer.next_token(), Token::Number(10.0));
544 assert_eq!(lexer.next_token(), Token::Newline);
545 assert_eq!(lexer.line(), 2);
546 assert_eq!(lexer.next_token(), Token::Set);
547 }
548
549 #[test]
550 fn test_complex_expression() {
551 let input = r#"
552 Func ADD (A, B) {
553 Return (A + B)
554 }
555 "#;
556 let mut lexer = Lexer::new(input);
557
558 assert_eq!(lexer.next_token(), Token::Newline);
559 assert_eq!(lexer.next_token(), Token::Func);
560 assert_eq!(lexer.next_token(), Token::Identifier("ADD".to_string()));
561 assert_eq!(lexer.next_token(), Token::LeftParen);
562 assert_eq!(lexer.next_token(), Token::Identifier("A".to_string()));
563 assert_eq!(lexer.next_token(), Token::Comma);
564 assert_eq!(lexer.next_token(), Token::Identifier("B".to_string()));
565 assert_eq!(lexer.next_token(), Token::RightParen);
566 assert_eq!(lexer.next_token(), Token::LeftBrace);
567 assert_eq!(lexer.next_token(), Token::Newline);
568 assert_eq!(lexer.next_token(), Token::Return);
569 assert_eq!(lexer.next_token(), Token::LeftParen);
570 assert_eq!(lexer.next_token(), Token::Identifier("A".to_string()));
571 assert_eq!(lexer.next_token(), Token::Plus);
572 assert_eq!(lexer.next_token(), Token::Identifier("B".to_string()));
573 assert_eq!(lexer.next_token(), Token::RightParen);
574 assert_eq!(lexer.next_token(), Token::Newline);
575 assert_eq!(lexer.next_token(), Token::RightBrace);
576 }
577}