1use std::fmt;
7
8#[derive(Debug, Clone, PartialEq)]
10pub enum Token {
11 Integer(i64),
14 Float(f64),
16 String(String),
18 True,
20 False,
22 Null,
24
25 Plus,
28 Minus,
30 Star,
32 Slash,
34 Percent,
36 Caret,
38
39 Equal,
42 EqualEqual,
44 NotEqual,
46 Less,
48 LessEqual,
50 Greater,
52 GreaterEqual,
54
55 AndAnd,
58 OrOr,
60 Bang,
62
63 Let,
66 In,
68 If,
70 Then,
72 Else,
74 Fn,
76 Guard,
78
79 Now,
82 Today,
84 Tomorrow,
86 Yesterday,
88 StartOfDay,
90 EndOfDay,
92 StartOfWeek,
94 EndOfWeek,
96 StartOfMonth,
98 EndOfMonth,
100 StartOfQuarter,
102 EndOfQuarter,
104 StartOfYear,
106 EndOfYear,
108 BeginningOfTime,
110 EndOfTime,
112
113 Identifier(String),
116 Dot,
118 Comma,
120 LeftParen,
122 RightParen,
124 LeftBracket,
126 RightBracket,
128 LeftBrace,
130 RightBrace,
132 Colon,
134 Semicolon,
136
137 Arrow,
140 Pipe,
142 LambdaArrow,
144 Alternative,
146 Eof,
152}
153
154impl fmt::Display for Token {
155 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
156 match self {
157 Token::Integer(n) => write!(f, "{}", n),
158 Token::Float(x) => write!(f, "{}", x),
159 Token::String(s) => write!(f, "'{}'", s),
160 Token::True => write!(f, "true"),
161 Token::False => write!(f, "false"),
162 Token::Null => write!(f, "null"),
163 Token::Plus => write!(f, "+"),
164 Token::Minus => write!(f, "-"),
165 Token::Star => write!(f, "*"),
166 Token::Slash => write!(f, "/"),
167 Token::Percent => write!(f, "%"),
168 Token::Caret => write!(f, "^"),
169 Token::Equal => write!(f, "="),
170 Token::EqualEqual => write!(f, "=="),
171 Token::NotEqual => write!(f, "!="),
172 Token::Less => write!(f, "<"),
173 Token::LessEqual => write!(f, "<="),
174 Token::Greater => write!(f, ">"),
175 Token::GreaterEqual => write!(f, ">="),
176 Token::AndAnd => write!(f, "&&"),
177 Token::OrOr => write!(f, "||"),
178 Token::Bang => write!(f, "!"),
179 Token::Let => write!(f, "let"),
180 Token::In => write!(f, "in"),
181 Token::If => write!(f, "if"),
182 Token::Then => write!(f, "then"),
183 Token::Else => write!(f, "else"),
184 Token::Fn => write!(f, "fn"),
185 Token::Guard => write!(f, "guard"),
186 Token::Now => write!(f, "NOW"),
187 Token::Today => write!(f, "TODAY"),
188 Token::Tomorrow => write!(f, "TOMORROW"),
189 Token::Yesterday => write!(f, "YESTERDAY"),
190 Token::StartOfDay => write!(f, "SOD"),
191 Token::EndOfDay => write!(f, "EOD"),
192 Token::StartOfWeek => write!(f, "SOW"),
193 Token::EndOfWeek => write!(f, "EOW"),
194 Token::StartOfMonth => write!(f, "SOM"),
195 Token::EndOfMonth => write!(f, "EOM"),
196 Token::StartOfQuarter => write!(f, "SOQ"),
197 Token::EndOfQuarter => write!(f, "EOQ"),
198 Token::StartOfYear => write!(f, "SOY"),
199 Token::EndOfYear => write!(f, "EOY"),
200 Token::BeginningOfTime => write!(f, "BOT"),
201 Token::EndOfTime => write!(f, "EOT"),
202 Token::Identifier(name) => write!(f, "{}", name),
203 Token::Dot => write!(f, "."),
204 Token::Comma => write!(f, ","),
205 Token::LeftParen => write!(f, "("),
206 Token::RightParen => write!(f, ")"),
207 Token::LeftBracket => write!(f, "["),
208 Token::RightBracket => write!(f, "]"),
209 Token::LeftBrace => write!(f, "{{"),
210 Token::RightBrace => write!(f, "}}"),
211 Token::Colon => write!(f, ":"),
212 Token::Semicolon => write!(f, ";"),
213 Token::Arrow => write!(f, "=>"),
214 Token::Pipe => write!(f, "|>"),
215 Token::LambdaArrow => write!(f, "~>"),
216 Token::Alternative => write!(f, "?|"),
217 Token::Eof => write!(f, "EOF"),
218 }
219 }
220}
221
222#[derive(Debug, Clone, PartialEq, Eq)]
224pub struct LexError {
225 pub message: String,
227 pub line: usize,
229 pub column: usize,
231}
232
233impl fmt::Display for LexError {
234 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
235 write!(
236 f,
237 "Lex error at line {}, column {}: {}",
238 self.line, self.column, self.message
239 )
240 }
241}
242
243impl std::error::Error for LexError {}
244
245#[derive(Debug)]
247pub struct Lexer<'a> {
248 #[allow(dead_code)]
249 input: &'a str,
250 position: usize,
251 line: usize,
252 column: usize,
253 chars: std::str::Chars<'a>,
254 current_char: Option<char>,
255}
256
257impl<'a> Lexer<'a> {
258 pub fn new(input: &'a str) -> Self {
260 let mut lexer = Lexer {
261 input,
262 position: 0,
263 line: 1,
264 column: 1,
265 chars: input.chars(),
266 current_char: None,
267 };
268 lexer.current_char = lexer.chars.next();
269 lexer
270 }
271
272 fn advance(&mut self) {
274 if let Some('\n') = self.current_char {
275 self.line += 1;
276 self.column = 1;
277 } else {
278 self.column += 1;
279 }
280 self.position += 1;
281 self.current_char = self.chars.next();
282 }
283
284 fn skip_whitespace(&mut self) {
286 while let Some(ch) = self.current_char {
287 if ch.is_whitespace() {
288 self.advance();
289 } else {
290 break;
291 }
292 }
293 }
294
295 fn read_number(&mut self) -> Result<Token, LexError> {
297 let start_line = self.line;
298 let start_col = self.column;
299 let mut num_str = String::new();
300 let mut is_float = false;
301
302 while let Some(ch) = self.current_char {
304 if ch.is_ascii_digit() {
305 num_str.push(ch);
306 self.advance();
307 } else if ch == '.' && !is_float {
308 is_float = true;
310 num_str.push(ch);
311 self.advance();
312 } else {
313 break;
314 }
315 }
316
317 if is_float {
318 num_str
319 .parse::<f64>()
320 .map(Token::Float)
321 .map_err(|_| LexError {
322 message: format!("Invalid float: {}", num_str),
323 line: start_line,
324 column: start_col,
325 })
326 } else {
327 num_str
328 .parse::<i64>()
329 .map(Token::Integer)
330 .map_err(|_| LexError {
331 message: format!("Invalid integer: {}", num_str),
332 line: start_line,
333 column: start_col,
334 })
335 }
336 }
337
338 fn read_string(&mut self) -> Result<Token, LexError> {
340 let start_line = self.line;
341 let start_col = self.column;
342 let mut result = String::new();
343
344 self.advance();
346
347 while let Some(ch) = self.current_char {
348 match ch {
349 '\'' => {
350 self.advance();
351 return Ok(Token::String(result));
352 }
353 '\\' => {
354 self.advance();
355 match self.current_char {
356 Some('n') => {
357 result.push('\n');
358 self.advance();
359 }
360 Some('t') => {
361 result.push('\t');
362 self.advance();
363 }
364 Some('r') => {
365 result.push('\r');
366 self.advance();
367 }
368 Some('\\') => {
369 result.push('\\');
370 self.advance();
371 }
372 Some('\'') => {
373 result.push('\'');
374 self.advance();
375 }
376 _ => {
377 return Err(LexError {
378 message: "Invalid escape sequence".to_string(),
379 line: self.line,
380 column: self.column,
381 });
382 }
383 }
384 }
385 _ => {
386 result.push(ch);
387 self.advance();
388 }
389 }
390 }
391
392 Err(LexError {
393 message: "Unterminated string literal".to_string(),
394 line: start_line,
395 column: start_col,
396 })
397 }
398
399 fn read_identifier(&mut self) -> Token {
401 let mut ident = String::new();
402
403 while let Some(ch) = self.current_char {
404 if ch.is_alphanumeric() || ch == '_' {
405 ident.push(ch);
406 self.advance();
407 } else {
408 break;
409 }
410 }
411
412 match ident.as_str() {
413 "let" => Token::Let,
414 "in" => Token::In,
415 "if" => Token::If,
416 "then" => Token::Then,
417 "else" => Token::Else,
418 "fn" => Token::Fn,
419 "guard" => Token::Guard,
420 "true" => Token::True,
421 "false" => Token::False,
422 "null" => Token::Null,
423 "NOW" => Token::Now,
424 "TODAY" => Token::Today,
425 "TOMORROW" => Token::Tomorrow,
426 "YESTERDAY" => Token::Yesterday,
427 "SOD" => Token::StartOfDay,
428 "EOD" => Token::EndOfDay,
429 "SOW" => Token::StartOfWeek,
430 "EOW" => Token::EndOfWeek,
431 "SOM" => Token::StartOfMonth,
432 "EOM" => Token::EndOfMonth,
433 "SOQ" => Token::StartOfQuarter,
434 "EOQ" => Token::EndOfQuarter,
435 "SOY" => Token::StartOfYear,
436 "EOY" => Token::EndOfYear,
437 "BOT" => Token::BeginningOfTime,
438 "EOT" => Token::EndOfTime,
439 _ => Token::Identifier(ident),
440 }
441 }
442
443 pub fn next_token(&mut self) -> Result<Token, LexError> {
445 self.skip_whitespace();
446
447 match self.current_char {
448 None => Ok(Token::Eof),
449 Some(ch) => {
450 match ch {
451 '+' => {
452 self.advance();
453 Ok(Token::Plus)
454 }
455 '-' => {
456 self.advance();
457 Ok(Token::Minus)
460 }
461 '*' => {
462 self.advance();
463 Ok(Token::Star)
464 }
465 '/' => {
466 self.advance();
467 Ok(Token::Slash)
468 }
469 '%' => {
470 self.advance();
471 Ok(Token::Percent)
472 }
473 '^' => {
474 self.advance();
475 Ok(Token::Caret)
476 }
477 '=' => {
478 self.advance();
479 if self.current_char == Some('=') {
480 self.advance();
481 Ok(Token::EqualEqual)
482 } else if self.current_char == Some('>') {
483 self.advance();
484 Ok(Token::Arrow)
485 } else {
486 Ok(Token::Equal)
487 }
488 }
489 '!' => {
490 self.advance();
491 if self.current_char == Some('=') {
492 self.advance();
493 Ok(Token::NotEqual)
494 } else {
495 Ok(Token::Bang)
496 }
497 }
498 '<' => {
499 self.advance();
500 if self.current_char == Some('=') {
501 self.advance();
502 Ok(Token::LessEqual)
503 } else {
504 Ok(Token::Less)
505 }
506 }
507 '>' => {
508 self.advance();
509 if self.current_char == Some('=') {
510 self.advance();
511 Ok(Token::GreaterEqual)
512 } else {
513 Ok(Token::Greater)
514 }
515 }
516 '&' => {
517 self.advance();
518 if self.current_char == Some('&') {
519 self.advance();
520 Ok(Token::AndAnd)
521 } else {
522 Err(LexError {
523 message: "Unexpected '&', did you mean '&&'?".to_string(),
524 line: self.line,
525 column: self.column - 1,
526 })
527 }
528 }
529 '|' => {
530 self.advance();
531 if self.current_char == Some('|') {
532 self.advance();
533 Ok(Token::OrOr)
534 } else if self.current_char == Some('>') {
535 self.advance();
536 Ok(Token::Pipe)
537 } else {
538 Err(LexError {
539 message: "Unexpected '|', did you mean '||' or '|>'?".to_string(),
540 line: self.line,
541 column: self.column - 1,
542 })
543 }
544 }
545 '?' => {
546 self.advance();
547 if self.current_char == Some('|') {
548 self.advance();
549 Ok(Token::Alternative)
550 } else {
551 Err(LexError {
552 message: "Unexpected '?', did you mean '?|'?".to_string(),
553 line: self.line,
554 column: self.column - 1,
555 })
556 }
557 }
558 '~' => {
559 self.advance();
560 if self.current_char == Some('>') {
561 self.advance();
562 Ok(Token::LambdaArrow)
563 } else {
564 Err(LexError {
565 message: "Unexpected '~', did you mean '~>'?".to_string(),
566 line: self.line,
567 column: self.column - 1,
568 })
569 }
570 }
571 '.' => {
572 self.advance();
573 Ok(Token::Dot)
574 }
575 ',' => {
576 self.advance();
577 Ok(Token::Comma)
578 }
579 '(' => {
580 self.advance();
581 Ok(Token::LeftParen)
582 }
583 ')' => {
584 self.advance();
585 Ok(Token::RightParen)
586 }
587 '[' => {
588 self.advance();
589 Ok(Token::LeftBracket)
590 }
591 ']' => {
592 self.advance();
593 Ok(Token::RightBracket)
594 }
595 '{' => {
596 self.advance();
597 Ok(Token::LeftBrace)
598 }
599 '}' => {
600 self.advance();
601 Ok(Token::RightBrace)
602 }
603 ':' => {
604 self.advance();
605 Ok(Token::Colon)
606 }
607 ';' => {
608 self.advance();
609 Ok(Token::Semicolon)
610 }
611 '\'' => self.read_string(),
612 _ if ch.is_ascii_digit() => self.read_number(),
613 _ if ch.is_alphabetic() => Ok(self.read_identifier()),
614 _ => Err(LexError {
615 message: format!("Unexpected character: '{}'", ch),
616 line: self.line,
617 column: self.column,
618 }),
619 }
620 }
621 }
622 }
623
624 pub fn tokenize(&mut self) -> Result<Vec<Token>, LexError> {
626 let mut tokens = Vec::new();
627
628 loop {
629 let token = self.next_token()?;
630 let is_eof = token == Token::Eof;
631 tokens.push(token);
632 if is_eof {
633 break;
634 }
635 }
636
637 Ok(tokens)
638 }
639}
640
641#[cfg(test)]
642mod tests {
643 use super::*;
644
645 #[test]
646 fn test_integer_literal() {
647 let mut lexer = Lexer::new("42");
648 let token = lexer.next_token().unwrap();
649 assert_eq!(token, Token::Integer(42));
650 }
651
652 #[test]
653 fn test_float_literal() {
654 let mut lexer = Lexer::new("3.15");
655 let token = lexer.next_token().unwrap();
656 assert_eq!(token, Token::Float(3.15));
657 }
658
659 #[test]
660 fn test_string_literal() {
661 let mut lexer = Lexer::new("'hello'");
662 let token = lexer.next_token().unwrap();
663 assert_eq!(token, Token::String("hello".to_string()));
664 }
665
666 #[test]
667 fn test_boolean_true() {
668 let mut lexer = Lexer::new("true");
669 let token = lexer.next_token().unwrap();
670 assert_eq!(token, Token::True);
671 }
672
673 #[test]
674 fn test_boolean_false() {
675 let mut lexer = Lexer::new("false");
676 let token = lexer.next_token().unwrap();
677 assert_eq!(token, Token::False);
678 }
679
680 #[test]
681 fn test_keyword_let() {
682 let mut lexer = Lexer::new("let");
683 let token = lexer.next_token().unwrap();
684 assert_eq!(token, Token::Let);
685 }
686
687 #[test]
688 fn test_keyword_if() {
689 let mut lexer = Lexer::new("if");
690 let token = lexer.next_token().unwrap();
691 assert_eq!(token, Token::If);
692 }
693
694 #[test]
695 fn test_identifier() {
696 let mut lexer = Lexer::new("myVar");
697 let token = lexer.next_token().unwrap();
698 assert_eq!(token, Token::Identifier("myVar".to_string()));
699 }
700
701 #[test]
702 fn test_plus_operator() {
703 let mut lexer = Lexer::new("+");
704 let token = lexer.next_token().unwrap();
705 assert_eq!(token, Token::Plus);
706 }
707
708 #[test]
709 fn test_equal_equal() {
710 let mut lexer = Lexer::new("==");
711 let token = lexer.next_token().unwrap();
712 assert_eq!(token, Token::EqualEqual);
713 }
714
715 #[test]
716 fn test_and_and() {
717 let mut lexer = Lexer::new("&&");
718 let token = lexer.next_token().unwrap();
719 assert_eq!(token, Token::AndAnd);
720 }
721
722 #[test]
723 fn test_pipe() {
724 let mut lexer = Lexer::new("|>");
725 let token = lexer.next_token().unwrap();
726 assert_eq!(token, Token::Pipe);
727 }
728
729 #[test]
730 fn test_lambda_arrow() {
731 let mut lexer = Lexer::new("~>");
732 let token = lexer.next_token().unwrap();
733 assert_eq!(token, Token::LambdaArrow);
734 }
735
736 #[test]
737 fn test_tokenize_simple_expr() {
738 let mut lexer = Lexer::new("age >= 18");
739 let tokens = lexer.tokenize().unwrap();
740 assert_eq!(
741 tokens,
742 vec![
743 Token::Identifier("age".to_string()),
744 Token::GreaterEqual,
745 Token::Integer(18),
746 Token::Eof
747 ]
748 );
749 }
750
751 #[test]
752 fn test_tokenize_function_call() {
753 let mut lexer = Lexer::new("length(name)");
754 let tokens = lexer.tokenize().unwrap();
755 assert_eq!(
756 tokens,
757 vec![
758 Token::Identifier("length".to_string()),
759 Token::LeftParen,
760 Token::Identifier("name".to_string()),
761 Token::RightParen,
762 Token::Eof
763 ]
764 );
765 }
766
767 #[test]
768 fn test_whitespace_ignored() {
769 let mut lexer = Lexer::new(" 42 + 3 ");
770 let tokens = lexer.tokenize().unwrap();
771 assert_eq!(
772 tokens,
773 vec![
774 Token::Integer(42),
775 Token::Plus,
776 Token::Integer(3),
777 Token::Eof
778 ]
779 );
780 }
781
782 #[test]
783 fn test_temporal_keywords() {
784 let mut lexer = Lexer::new("NOW TODAY TOMORROW");
785 let tokens = lexer.tokenize().unwrap();
786 assert_eq!(
787 tokens,
788 vec![Token::Now, Token::Today, Token::Tomorrow, Token::Eof]
789 );
790 }
791
792 #[test]
793 fn test_negative_number_lexing() {
794 let mut lexer = Lexer::new("-42");
796 let tokens = lexer.tokenize().unwrap();
797 assert_eq!(tokens, vec![Token::Minus, Token::Integer(42), Token::Eof]);
798 }
799
800 #[test]
801 fn test_minus_operator() {
802 let mut lexer = Lexer::new("10 - 5");
803 let tokens = lexer.tokenize().unwrap();
804 assert_eq!(
805 tokens,
806 vec![
807 Token::Integer(10),
808 Token::Minus,
809 Token::Integer(5),
810 Token::Eof
811 ]
812 );
813 }
814
815 #[test]
816 fn test_dot_operator() {
817 let mut lexer = Lexer::new("user.age");
818 let tokens = lexer.tokenize().unwrap();
819 assert_eq!(
820 tokens,
821 vec![
822 Token::Identifier("user".to_string()),
823 Token::Dot,
824 Token::Identifier("age".to_string()),
825 Token::Eof
826 ]
827 );
828 }
829
830 #[test]
831 fn test_string_escape_sequences() {
832 let mut lexer = Lexer::new("'hello\\nworld'");
833 let token = lexer.next_token().unwrap();
834 assert_eq!(token, Token::String("hello\nworld".to_string()));
835 }
836
837 #[test]
838 fn test_lex_error_unterminated_string() {
839 let mut lexer = Lexer::new("'hello");
840 let result = lexer.next_token();
841 assert!(result.is_err());
842 }
843
844 #[test]
845 fn test_lex_error_invalid_char() {
846 let mut lexer = Lexer::new("@#$");
847 let result = lexer.next_token();
848 assert!(result.is_err());
849 }
850
851 #[test]
852 fn test_complex_expression() {
853 let mut lexer = Lexer::new("let x = 42 in x + 1");
854 let tokens = lexer.tokenize().unwrap();
855 assert_eq!(tokens.len(), 9); assert_eq!(tokens[0], Token::Let);
857 assert_eq!(tokens[1], Token::Identifier("x".to_string()));
858 assert_eq!(tokens[2], Token::Equal);
859 assert_eq!(tokens[3], Token::Integer(42));
860 assert_eq!(tokens[4], Token::In);
861 }
862
863 #[test]
864 fn test_arrow_operator() {
865 let mut lexer = Lexer::new("=>");
866 let token = lexer.next_token().unwrap();
867 assert_eq!(token, Token::Arrow);
868 }
869
870 #[test]
871 fn test_null_literal() {
872 let mut lexer = Lexer::new("null");
873 let token = lexer.next_token().unwrap();
874 assert_eq!(token, Token::Null);
875 }
876}