1use crate::token::Token;
7
8pub struct Lexer {
10 input: Vec<char>,
11 position: usize, read_position: usize, ch: char, line: usize, column: usize, had_whitespace_before_token: bool, }
18
19impl Lexer {
20 pub fn new(input: &str) -> Self {
22 let mut lexer = Lexer {
23 input: input.chars().collect(),
24 position: 0,
25 read_position: 0,
26 ch: '\0',
27 line: 1,
28 column: 0,
29 had_whitespace_before_token: false,
30 };
31 lexer.read_char(); lexer
33 }
34
35 pub fn line(&self) -> usize {
37 self.line
38 }
39
40 pub fn column(&self) -> usize {
42 self.column
43 }
44
45 pub fn had_whitespace(&self) -> bool {
47 self.had_whitespace_before_token
48 }
49
50 fn read_char(&mut self) {
52 if self.read_position >= self.input.len() {
53 self.ch = '\0'; } else {
55 self.ch = self.input[self.read_position];
56 }
57
58 if self.ch == '\n' {
60 self.line += 1;
61 self.column = 0;
62 } else {
63 self.column += 1;
64 }
65
66 self.position = self.read_position;
67 self.read_position += 1;
68 }
69
70 fn peek_char(&self) -> char {
72 if self.read_position >= self.input.len() {
73 '\0'
74 } else {
75 self.input[self.read_position]
76 }
77 }
78
79 pub fn next_token(&mut self) -> Token {
81 let had_ws = self.skip_whitespace();
82 self.had_whitespace_before_token = had_ws;
83
84 let token = match self.ch {
85 '+' => Token::Plus,
87 '-' => {
88 if self.peek_char() == '>' {
89 self.read_char();
90 Token::Arrow
91 } else {
92 Token::Minus
93 }
94 }
95 '*' => Token::Multiply,
96 '/' => {
97 if self.peek_char() == '/' {
99 self.skip_line_comment();
100 return self.next_token();
101 } else if self.peek_char() == '*' {
102 self.skip_block_comment();
103 return self.next_token();
104 } else {
105 Token::Divide
106 }
107 }
108 '%' => Token::Modulo,
109
110 '=' => {
112 if self.peek_char() == '=' {
113 self.read_char();
114 Token::Equal
115 } else {
116 Token::Assign
117 }
118 }
119 '!' => {
120 if self.peek_char() == '=' {
121 self.read_char();
122 Token::NotEqual
123 } else {
124 Token::Not
125 }
126 }
127 '<' => {
128 if self.peek_char() == '=' {
129 self.read_char();
130 Token::LessEqual
131 } else {
132 Token::Less
133 }
134 }
135 '>' => {
136 if self.peek_char() == '=' {
137 self.read_char();
138 Token::GreaterEqual
139 } else {
140 Token::Greater
141 }
142 }
143 '&' => {
144 if self.peek_char() == '&' {
145 self.read_char();
146 Token::And
147 } else {
148 Token::Illegal('&')
149 }
150 }
151 '|' => {
152 if self.peek_char() == '|' {
153 self.read_char();
154 Token::Or
155 } else {
156 Token::Illegal('|')
157 }
158 }
159
160 '(' => Token::LeftParen,
162 ')' => Token::RightParen,
163 '{' => Token::LeftBrace,
164 '}' => Token::RightBrace,
165 '[' => Token::LeftBracket,
166 ']' => Token::RightBracket,
167 ',' => Token::Comma,
168 ':' => Token::Colon,
169 ';' => Token::Semicolon,
170
171 '"' => return self.read_string(),
173
174 '\n' => Token::Newline,
176
177 '\0' => Token::EOF,
179
180 _ => {
182 if self.ch.is_alphabetic() || self.ch == '_' {
183 return self.read_identifier();
184 } else if self.ch.is_numeric() {
185 return self.read_number();
186 } else {
187 Token::Illegal(self.ch)
188 }
189 }
190 };
191
192 self.read_char();
193 token
194 }
195
196 fn skip_whitespace(&mut self) -> bool {
199 let mut skipped = false;
200 while self.ch == ' ' || self.ch == '\t' || self.ch == '\r' {
201 skipped = true;
202 self.read_char();
203 }
204 skipped
205 }
206
207 fn skip_line_comment(&mut self) {
209 while self.ch != '\n' && self.ch != '\0' {
210 self.read_char();
211 }
212 }
213
214 fn skip_block_comment(&mut self) {
216 self.read_char(); self.read_char(); while !(self.ch == '*' && self.peek_char() == '/') && self.ch != '\0' {
220 if self.ch == '\n' {
221 self.line += 1;
222 self.column = 0;
223 }
224 self.read_char();
225 }
226
227 if self.ch != '\0' {
228 self.read_char(); self.read_char(); }
231 }
232
233 fn read_identifier(&mut self) -> Token {
235 let start = self.position;
236
237 while self.ch.is_alphanumeric() || self.ch == '_' {
239 self.read_char();
240 }
241
242 let ident: String = self.input[start..self.position].iter().collect();
243 Token::lookup_keyword(&ident)
244 }
245
246 fn read_number(&mut self) -> Token {
248 let start = self.position;
249 let mut has_dot = false;
250
251 while self.ch.is_numeric() || (self.ch == '.' && !has_dot) {
252 if self.ch == '.' {
253 if !self.peek_char().is_numeric() {
255 break;
256 }
257 has_dot = true;
258 }
259 self.read_char();
260 }
261
262 let num_str: String = self.input[start..self.position].iter().collect();
263
264 if !has_dot && num_str.len() > 15 {
266 return Token::BigInteger(num_str);
267 }
268
269 match num_str.parse::<f64>() {
270 Ok(num) => Token::Number(num),
271 Err(_) => Token::Illegal('0'), }
273 }
274
275 fn read_string(&mut self) -> Token {
277 self.read_char(); let start = self.position;
279
280 while self.ch != '"' && self.ch != '\0' {
281 if self.ch == '\\' {
283 self.read_char(); if self.ch != '\0' {
285 self.read_char(); }
287 } else {
288 if self.ch == '\n' {
289 self.line += 1;
290 self.column = 0;
291 }
292 self.read_char();
293 }
294 }
295
296 if self.ch == '\0' {
297 return Token::Illegal('"'); }
299
300 let string: String = self.input[start..self.position].iter().collect();
301 self.read_char(); Token::String(self.process_escapes(&string))
305 }
306
307 fn process_escapes(&self, s: &str) -> String {
309 let mut result = String::new();
310 let mut chars = s.chars();
311
312 while let Some(ch) = chars.next() {
313 if ch == '\\' {
314 match chars.next() {
315 Some('n') => result.push('\n'),
316 Some('t') => result.push('\t'),
317 Some('r') => result.push('\r'),
318 Some('\\') => result.push('\\'),
319 Some('"') => result.push('"'),
320 Some(c) => {
321 result.push('\\');
322 result.push(c);
323 }
324 None => result.push('\\'),
325 }
326 } else {
327 result.push(ch);
328 }
329 }
330
331 result
332 }
333}
334
335#[cfg(test)]
336mod tests {
337 use super::*;
338
339 #[test]
340 fn test_basic_tokens() {
341 let input = "Set X 10";
342 let mut lexer = Lexer::new(input);
343
344 assert_eq!(lexer.next_token(), Token::Set);
345 assert_eq!(lexer.next_token(), Token::Identifier("X".to_string()));
346 assert_eq!(lexer.next_token(), Token::Number(10.0));
347 assert_eq!(lexer.next_token(), Token::EOF);
348 }
349
350 #[test]
351 fn test_operators() {
352 let input = "+ - * / % == != < <= > >= && || !";
353 let mut lexer = Lexer::new(input);
354
355 assert_eq!(lexer.next_token(), Token::Plus);
356 assert_eq!(lexer.next_token(), Token::Minus);
357 assert_eq!(lexer.next_token(), Token::Multiply);
358 assert_eq!(lexer.next_token(), Token::Divide);
359 assert_eq!(lexer.next_token(), Token::Modulo);
360 assert_eq!(lexer.next_token(), Token::Equal);
361 assert_eq!(lexer.next_token(), Token::NotEqual);
362 assert_eq!(lexer.next_token(), Token::Less);
363 assert_eq!(lexer.next_token(), Token::LessEqual);
364 assert_eq!(lexer.next_token(), Token::Greater);
365 assert_eq!(lexer.next_token(), Token::GreaterEqual);
366 assert_eq!(lexer.next_token(), Token::And);
367 assert_eq!(lexer.next_token(), Token::Or);
368 assert_eq!(lexer.next_token(), Token::Not);
369 assert_eq!(lexer.next_token(), Token::EOF);
370 }
371
372 #[test]
373 fn test_string_literal() {
374 let input = r#"Set MSG "Hello World""#;
375 let mut lexer = Lexer::new(input);
376
377 assert_eq!(lexer.next_token(), Token::Set);
378 assert_eq!(lexer.next_token(), Token::Identifier("MSG".to_string()));
379 assert_eq!(lexer.next_token(), Token::String("Hello World".to_string()));
380 assert_eq!(lexer.next_token(), Token::EOF);
381 }
382
383 #[test]
384 fn test_string_with_escapes() {
385 let input = r#""Hello\nWorld\t!""#;
386 let mut lexer = Lexer::new(input);
387
388 assert_eq!(
389 lexer.next_token(),
390 Token::String("Hello\nWorld\t!".to_string())
391 );
392 }
393
394 #[test]
395 fn test_numbers() {
396 let input = "123 45.67 0.5";
397 let mut lexer = Lexer::new(input);
398
399 assert_eq!(lexer.next_token(), Token::Number(123.0));
400 assert_eq!(lexer.next_token(), Token::Number(45.67));
401 assert_eq!(lexer.next_token(), Token::Number(0.5));
402 assert_eq!(lexer.next_token(), Token::EOF);
403 }
404
405 #[test]
406 fn test_keywords() {
407 let input = "Set Func If Else While For Return True False Null";
408 let mut lexer = Lexer::new(input);
409
410 assert_eq!(lexer.next_token(), Token::Set);
411 assert_eq!(lexer.next_token(), Token::Func);
412 assert_eq!(lexer.next_token(), Token::If);
413 assert_eq!(lexer.next_token(), Token::Else);
414 assert_eq!(lexer.next_token(), Token::While);
415 assert_eq!(lexer.next_token(), Token::For);
416 assert_eq!(lexer.next_token(), Token::Return);
417 assert_eq!(lexer.next_token(), Token::Boolean(true));
418 assert_eq!(lexer.next_token(), Token::Boolean(false));
419 assert_eq!(lexer.next_token(), Token::Null);
420 assert_eq!(lexer.next_token(), Token::EOF);
421 }
422
423 #[test]
424 fn test_identifiers() {
425 let input = "USER_NAME CALCULATE_TOTAL MY_VAR";
426 let mut lexer = Lexer::new(input);
427
428 assert_eq!(
429 lexer.next_token(),
430 Token::Identifier("USER_NAME".to_string())
431 );
432 assert_eq!(
433 lexer.next_token(),
434 Token::Identifier("CALCULATE_TOTAL".to_string())
435 );
436 assert_eq!(lexer.next_token(), Token::Identifier("MY_VAR".to_string()));
437 assert_eq!(lexer.next_token(), Token::EOF);
438 }
439
440 #[test]
441 fn test_delimiters() {
442 let input = "( ) { } [ ] , : ;";
443 let mut lexer = Lexer::new(input);
444
445 assert_eq!(lexer.next_token(), Token::LeftParen);
446 assert_eq!(lexer.next_token(), Token::RightParen);
447 assert_eq!(lexer.next_token(), Token::LeftBrace);
448 assert_eq!(lexer.next_token(), Token::RightBrace);
449 assert_eq!(lexer.next_token(), Token::LeftBracket);
450 assert_eq!(lexer.next_token(), Token::RightBracket);
451 assert_eq!(lexer.next_token(), Token::Comma);
452 assert_eq!(lexer.next_token(), Token::Colon);
453 assert_eq!(lexer.next_token(), Token::Semicolon);
454 assert_eq!(lexer.next_token(), Token::EOF);
455 }
456
457 #[test]
458 fn test_line_comment() {
459 let input = "Set X 10 // This is a comment\nSet Y 20";
460 let mut lexer = Lexer::new(input);
461
462 assert_eq!(lexer.next_token(), Token::Set);
463 assert_eq!(lexer.next_token(), Token::Identifier("X".to_string()));
464 assert_eq!(lexer.next_token(), Token::Number(10.0));
465 assert_eq!(lexer.next_token(), Token::Newline);
466 assert_eq!(lexer.next_token(), Token::Set);
467 assert_eq!(lexer.next_token(), Token::Identifier("Y".to_string()));
468 assert_eq!(lexer.next_token(), Token::Number(20.0));
469 }
470
471 #[test]
472 fn test_block_comment() {
473 let input = "Set X /* block comment */ 10";
474 let mut lexer = Lexer::new(input);
475
476 assert_eq!(lexer.next_token(), Token::Set);
477 assert_eq!(lexer.next_token(), Token::Identifier("X".to_string()));
478 assert_eq!(lexer.next_token(), Token::Number(10.0));
479 }
480
481 #[test]
482 fn test_newlines() {
483 let input = "Set X 10\nSet Y 20";
484 let mut lexer = Lexer::new(input);
485
486 assert_eq!(lexer.next_token(), Token::Set);
487 assert_eq!(lexer.next_token(), Token::Identifier("X".to_string()));
488 assert_eq!(lexer.next_token(), Token::Number(10.0));
489 assert_eq!(lexer.next_token(), Token::Newline);
490 assert_eq!(lexer.line(), 2);
491 assert_eq!(lexer.next_token(), Token::Set);
492 }
493
494 #[test]
495 fn test_complex_expression() {
496 let input = r#"
497 Func ADD (A, B) {
498 Return (A + B)
499 }
500 "#;
501 let mut lexer = Lexer::new(input);
502
503 assert_eq!(lexer.next_token(), Token::Newline);
504 assert_eq!(lexer.next_token(), Token::Func);
505 assert_eq!(lexer.next_token(), Token::Identifier("ADD".to_string()));
506 assert_eq!(lexer.next_token(), Token::LeftParen);
507 assert_eq!(lexer.next_token(), Token::Identifier("A".to_string()));
508 assert_eq!(lexer.next_token(), Token::Comma);
509 assert_eq!(lexer.next_token(), Token::Identifier("B".to_string()));
510 assert_eq!(lexer.next_token(), Token::RightParen);
511 assert_eq!(lexer.next_token(), Token::LeftBrace);
512 assert_eq!(lexer.next_token(), Token::Newline);
513 assert_eq!(lexer.next_token(), Token::Return);
514 assert_eq!(lexer.next_token(), Token::LeftParen);
515 assert_eq!(lexer.next_token(), Token::Identifier("A".to_string()));
516 assert_eq!(lexer.next_token(), Token::Plus);
517 assert_eq!(lexer.next_token(), Token::Identifier("B".to_string()));
518 assert_eq!(lexer.next_token(), Token::RightParen);
519 assert_eq!(lexer.next_token(), Token::Newline);
520 assert_eq!(lexer.next_token(), Token::RightBrace);
521 }
522}