Skip to main content

rustbasic_core/template/
lexer.rs

1#[derive(Debug, Clone, PartialEq)]
2pub enum Token {
3    Text(String),
4    VariableStart,     // {{
5    VariableEnd,       // }}
6    BlockStart,        // {%
7    BlockEnd,          // %}
8    Identifier(String),// e.g. names, keywords (if, for, in, endif, endfor)
9    StringLiteral(String),
10    NumberLiteral(f64),
11    Operator(String),  // =, ==, !=, <, >, <=, >=, |
12    Dot,               // .
13    Comma,             // ,
14}
15
16pub struct Lexer<'a> {
17    chars: Vec<char>,
18    pos: usize,
19    in_expression: bool,
20    _input: &'a str,
21}
22
23impl<'a> Lexer<'a> {
24    pub fn new(input: &'a str) -> Self {
25        Self {
26            chars: input.chars().collect(),
27            pos: 0,
28            in_expression: false,
29            _input: input,
30        }
31    }
32
33    fn peek_next(&self) -> Option<char> {
34        if self.pos + 1 < self.chars.len() {
35            Some(self.chars[self.pos + 1])
36        } else {
37            None
38        }
39    }
40
41    fn advance(&mut self) -> Option<char> {
42        if self.pos < self.chars.len() {
43            let ch = self.chars[self.pos];
44            self.pos += 1;
45            Some(ch)
46        } else {
47            None
48        }
49    }
50
51    pub fn tokenize(&mut self) -> Result<Vec<Token>, String> {
52        let mut tokens = Vec::new();
53
54        while self.pos < self.chars.len() {
55            if !self.in_expression {
56                let mut text = String::new();
57                let mut found_delim = false;
58
59                while self.pos < self.chars.len() {
60                    let ch = self.chars[self.pos];
61                    let next = self.peek_next();
62
63                    if ch == '{' && next == Some('{') {
64                        found_delim = true;
65                        if !text.is_empty() {
66                            tokens.push(Token::Text(std::mem::take(&mut text)));
67                        }
68                        tokens.push(Token::VariableStart);
69                        self.advance(); // consume '{'
70                        self.advance(); // consume '{'
71                        self.in_expression = true;
72                        break;
73                    } else if ch == '{' && next == Some('%') {
74                        found_delim = true;
75                        if !text.is_empty() {
76                            tokens.push(Token::Text(std::mem::take(&mut text)));
77                        }
78                        tokens.push(Token::BlockStart);
79                        self.advance(); // consume '{'
80                        self.advance(); // consume '%'
81                        self.in_expression = true;
82                        break;
83                    } else {
84                        text.push(ch);
85                        self.pos += 1;
86                    }
87                }
88
89                if !found_delim && !text.is_empty() {
90                    tokens.push(Token::Text(text));
91                }
92            } else {
93                self.skip_whitespace();
94                if self.pos >= self.chars.len() {
95                    break;
96                }
97
98                let ch = self.chars[self.pos];
99                let next = self.peek_next();
100
101                if ch == '}' && next == Some('}') {
102                    tokens.push(Token::VariableEnd);
103                    self.advance(); // consume '}'
104                    self.advance(); // consume '}'
105                    self.in_expression = false;
106                } else if ch == '%' && next == Some('}') {
107                    tokens.push(Token::BlockEnd);
108                    self.advance(); // consume '%'
109                    self.advance(); // consume '}'
110                    self.in_expression = false;
111                } else if ch == '\'' || ch == '"' {
112                    let quote = ch;
113                    self.advance(); // consume quote
114                    let mut literal = String::new();
115                    let mut escaped = false;
116                    while self.pos < self.chars.len() {
117                        let current_ch = self.chars[self.pos];
118                        if escaped {
119                            literal.push(current_ch);
120                            escaped = false;
121                            self.pos += 1;
122                        } else if current_ch == '\\' {
123                            escaped = true;
124                            self.pos += 1;
125                        } else if current_ch == quote {
126                            self.pos += 1; // consume quote
127                            break;
128                        } else {
129                            literal.push(current_ch);
130                            self.pos += 1;
131                        }
132                    }
133                    tokens.push(Token::StringLiteral(literal));
134                } else if ch.is_ascii_digit() {
135                    let mut num_str = String::new();
136                    while self.pos < self.chars.len() {
137                        let current_ch = self.chars[self.pos];
138                        if current_ch.is_ascii_digit() || current_ch == '.' {
139                            num_str.push(current_ch);
140                            self.pos += 1;
141                        } else {
142                            break;
143                        }
144                    }
145                    let val: f64 = num_str.parse().map_err(|e| format!("Invalid number: {}", e))?;
146                    tokens.push(Token::NumberLiteral(val));
147                } else if ch.is_alphabetic() || ch == '_' {
148                    let mut ident = String::new();
149                    while self.pos < self.chars.len() {
150                        let current_ch = self.chars[self.pos];
151                        if current_ch.is_alphanumeric() || current_ch == '_' {
152                            ident.push(current_ch);
153                            self.pos += 1;
154                        } else {
155                            break;
156                        }
157                    }
158                    tokens.push(Token::Identifier(ident));
159                } else if ch == '.' {
160                    tokens.push(Token::Dot);
161                    self.pos += 1;
162                } else if ch == ',' {
163                    tokens.push(Token::Comma);
164                    self.pos += 1;
165                } else if ch == '=' && next == Some('=') {
166                    tokens.push(Token::Operator("==".to_string()));
167                    self.advance();
168                    self.advance();
169                } else if ch == '!' && next == Some('=') {
170                    tokens.push(Token::Operator("!=".to_string()));
171                    self.advance();
172                    self.advance();
173                } else if ch == '<' && next == Some('=') {
174                    tokens.push(Token::Operator("<=".to_string()));
175                    self.advance();
176                    self.advance();
177                } else if ch == '>' && next == Some('=') {
178                    tokens.push(Token::Operator(">=".to_string()));
179                    self.advance();
180                    self.advance();
181                } else if ch == '<' {
182                    tokens.push(Token::Operator("<".to_string()));
183                    self.pos += 1;
184                } else if ch == '>' {
185                    tokens.push(Token::Operator(">".to_string()));
186                    self.pos += 1;
187                } else if ch == '|' {
188                    tokens.push(Token::Operator("|".to_string()));
189                    self.pos += 1;
190                } else {
191                    tokens.push(Token::Operator(ch.to_string()));
192                    self.pos += 1;
193                }
194            }
195        }
196
197        Ok(tokens)
198    }
199
200    fn skip_whitespace(&mut self) {
201        while self.pos < self.chars.len() && self.chars[self.pos].is_whitespace() {
202            self.pos += 1;
203        }
204    }
205}