vtl_rs/
lexer.rs

1use crate::types::*;
2
3pub fn lex(input: &str) -> Result<Vec<Token>, String> {
4    let mut tokens = Vec::new();
5    let mut chars = input.chars().peekable();
6
7    while let Some(&ch) = chars.peek() {
8        match ch {
9            ' ' | '\t' => {
10                chars.next(); // skip whitespace
11            }
12            '?' => {
13                tokens.push(Token::Variable('?'));
14                chars.next();
15            }
16            '%' => {
17                tokens.push(Token::Remainder); // '%' is used for remainder/modulus
18                chars.next();
19            }
20            '$' | '#' | '!' | '&' => {
21                tokens.push(Token::Variable(ch)); // system variables
22                chars.next();
23            }
24            ':' => {
25                tokens.push(Token::Colon); // for array subscripting
26                chars.next();
27            }
28            ')' => {
29                tokens.push(Token::RightParen); // close subscript expression
30                chars.next();
31            }
32            '>' => {
33                tokens.push(Token::GreaterThan);
34                chars.next();
35            }
36            '<' => {
37                tokens.push(Token::LessThan);
38                chars.next();
39            }
40            '=' => {
41                tokens.push(Token::Assign);
42                chars.next();
43            }
44            '+' => {
45                tokens.push(Token::Plus);
46                chars.next();
47            }
48            '-' => {
49                tokens.push(Token::Minus);
50                chars.next();
51            }
52            '*' => {
53                tokens.push(Token::Multiply);
54                chars.next();
55            }
56            '/' => {
57                tokens.push(Token::Divide);
58                chars.next();
59            }
60            '0'..='9' => {
61                // Handle numbers
62                let mut num: i64 = 0;
63                while let Some(&digit) = chars.peek() {
64                    if digit.is_numeric() {
65                        // Use checked_mul and checked_add to prevent overflow
66                        num = num
67                            .checked_mul(10)
68                            .and_then(|n| n.checked_add(digit as i64 - '0' as i64))
69                            .ok_or_else(|| "Overflow error".to_string())?;
70                        chars.next();
71                    } else {
72                        break;
73                    }
74                }
75                tokens.push(Token::Number(num));
76            }
77            'A'..='Z' => {
78                // Handle variable names A-Z
79                tokens.push(Token::Variable(ch));
80                chars.next();
81            }
82            '"' => {
83                // Handle string literals
84                chars.next(); // Skip the opening quote
85                let mut string_literal = String::new();
86                let mut found_closing_quote = false;
87
88                while let Some(&next_char) = chars.peek() {
89                    if next_char == '"' {
90                        chars.next(); // Skip the closing quote
91                        tokens.push(Token::StringLiteral(string_literal));
92                        found_closing_quote = true;
93                        break;
94                    }
95                    string_literal.push(next_char);
96                    chars.next();
97                }
98
99                // If we reached the end of input without finding a closing quote
100                if !found_closing_quote {
101                    return Err("Unclosed string literal".to_string());
102                }
103            }
104            _ => {
105                tokens.push(Token::Invalid(ch)); // Invalid tokens
106                chars.next();
107            }
108        }
109    }
110
111    tokens.push(Token::EndOfInput); // Mark the end of input
112    Ok(tokens)
113}