1use super::token::{Token, TokenKind};
2use crate::error::{LustError, Result};
3use alloc::{
4 format,
5 string::{String, ToString},
6 vec::Vec,
7};
8pub struct Lexer {
9 input: Vec<char>,
10 position: usize,
11 line: usize,
12 column: usize,
13}
14
15impl Lexer {
16 pub fn new(input: &str) -> Self {
17 Self {
18 input: input.chars().collect(),
19 position: 0,
20 line: 1,
21 column: 1,
22 }
23 }
24
25 pub fn tokenize(&mut self) -> Result<Vec<Token>> {
26 let mut tokens = Vec::new();
27 while !self.is_at_end() {
28 self.skip_whitespace_and_comments()?;
29 if self.is_at_end() {
30 break;
31 }
32
33 let token = self.next_token()?;
34 tokens.push(token);
35 }
36
37 tokens.push(Token::new(
38 TokenKind::Eof,
39 String::new(),
40 self.line,
41 self.column,
42 ));
43 Ok(tokens)
44 }
45
46 fn next_token(&mut self) -> Result<Token> {
47 let start_line = self.line;
48 let start_column = self.column;
49 let ch = self.current_char();
50 let (kind, lexeme) = match ch {
51 '(' => {
52 self.advance();
53 (TokenKind::LeftParen, "(".to_string())
54 }
55
56 ')' => {
57 self.advance();
58 (TokenKind::RightParen, ")".to_string())
59 }
60
61 '{' => {
62 self.advance();
63 (TokenKind::LeftBrace, "{".to_string())
64 }
65
66 '}' => {
67 self.advance();
68 (TokenKind::RightBrace, "}".to_string())
69 }
70
71 '[' => {
72 self.advance();
73 (TokenKind::LeftBracket, "[".to_string())
74 }
75
76 ']' => {
77 self.advance();
78 (TokenKind::RightBracket, "]".to_string())
79 }
80
81 ',' => {
82 self.advance();
83 (TokenKind::Comma, ",".to_string())
84 }
85
86 ';' => {
87 self.advance();
88 (TokenKind::Semicolon, ";".to_string())
89 }
90
91 '%' => {
92 self.advance();
93 (TokenKind::Percent, "%".to_string())
94 }
95
96 '^' => {
97 self.advance();
98 (TokenKind::Caret, "^".to_string())
99 }
100
101 '?' => {
102 self.advance();
103 (TokenKind::Question, "?".to_string())
104 }
105
106 '&' => {
107 self.advance();
108 (TokenKind::Ampersand, "&".to_string())
109 }
110
111 '|' => {
112 self.advance();
113 (TokenKind::Pipe, "|".to_string())
114 }
115
116 '+' => {
117 self.advance();
118 if self.current_char() == '=' {
119 self.advance();
120 (TokenKind::PlusEqual, "+=".to_string())
121 } else {
122 (TokenKind::Plus, "+".to_string())
123 }
124 }
125
126 '-' => {
127 self.advance();
128 if self.current_char() == '=' {
129 self.advance();
130 (TokenKind::MinusEqual, "-=".to_string())
131 } else if self.current_char() == '>' {
132 self.advance();
133 (TokenKind::Arrow, "->".to_string())
134 } else {
135 (TokenKind::Minus, "-".to_string())
136 }
137 }
138
139 '*' => {
140 self.advance();
141 if self.current_char() == '=' {
142 self.advance();
143 (TokenKind::StarEqual, "*=".to_string())
144 } else {
145 (TokenKind::Star, "*".to_string())
146 }
147 }
148
149 '/' => {
150 self.advance();
151 if self.current_char() == '=' {
152 self.advance();
153 (TokenKind::SlashEqual, "/=".to_string())
154 } else {
155 (TokenKind::Slash, "/".to_string())
156 }
157 }
158
159 '=' => {
160 self.advance();
161 if self.current_char() == '=' {
162 self.advance();
163 (TokenKind::DoubleEqual, "==".to_string())
164 } else if self.current_char() == '>' {
165 self.advance();
166 (TokenKind::FatArrow, "=>".to_string())
167 } else {
168 (TokenKind::Equal, "=".to_string())
169 }
170 }
171
172 '~' => {
173 self.advance();
174 if self.current_char() == '=' {
175 self.advance();
176 (TokenKind::NotEqual, "~=".to_string())
177 } else {
178 return Err(LustError::LexerError {
179 line: start_line,
180 column: start_column,
181 message: format!("Unexpected character: {}", ch),
182 module: None,
183 });
184 }
185 }
186
187 '!' => {
188 self.advance();
189 if self.current_char() == '=' {
190 self.advance();
191 (TokenKind::NotEqual, "!=".to_string())
192 } else {
193 return Err(LustError::LexerError {
194 line: start_line,
195 column: start_column,
196 message: format!("Unexpected character: {}", ch),
197 module: None,
198 });
199 }
200 }
201
202 '<' => {
203 self.advance();
204 if self.current_char() == '=' {
205 self.advance();
206 (TokenKind::LessEqual, "<=".to_string())
207 } else {
208 (TokenKind::Less, "<".to_string())
209 }
210 }
211
212 '>' => {
213 self.advance();
214 if self.current_char() == '=' {
215 self.advance();
216 (TokenKind::GreaterEqual, ">=".to_string())
217 } else {
218 (TokenKind::Greater, ">".to_string())
219 }
220 }
221
222 ':' => {
223 self.advance();
224 if self.current_char() == ':' {
225 self.advance();
226 (TokenKind::DoubleColon, "::".to_string())
227 } else {
228 (TokenKind::Colon, ":".to_string())
229 }
230 }
231
232 '.' => {
233 self.advance();
234 if self.current_char() == '.' {
235 self.advance();
236 (TokenKind::DoubleDot, "..".to_string())
237 } else {
238 (TokenKind::Dot, ".".to_string())
239 }
240 }
241
242 '"' | '\'' => self.scan_string()?,
243 '0'..='9' => self.scan_number()?,
244 'a'..='z' | 'A'..='Z' | '_' => self.scan_identifier()?,
245 _ => {
246 return Err(LustError::LexerError {
247 line: start_line,
248 column: start_column,
249 message: format!("Unexpected character: {}", ch),
250 module: None,
251 });
252 }
253 };
254 Ok(Token::new(kind, lexeme, start_line, start_column))
255 }
256
257 fn scan_string(&mut self) -> Result<(TokenKind, String)> {
258 let quote = self.current_char();
259 let start_line = self.line;
260 let start_column = self.column;
261 self.advance();
262 let mut value = String::new();
263 value.push(quote);
264 while !self.is_at_end() && self.current_char() != quote {
265 if self.current_char() == '\\' {
266 value.push(self.current_char());
267 self.advance();
268 if !self.is_at_end() {
269 value.push(self.current_char());
270 self.advance();
271 }
272 } else {
273 value.push(self.current_char());
274 self.advance();
275 }
276 }
277
278 if self.is_at_end() {
279 return Err(LustError::LexerError {
280 line: start_line,
281 column: start_column,
282 message: "Unterminated string".to_string(),
283 module: None,
284 });
285 }
286
287 value.push(self.current_char());
288 self.advance();
289 Ok((TokenKind::String, value))
290 }
291
292 fn scan_number(&mut self) -> Result<(TokenKind, String)> {
293 let mut value = String::new();
294 let mut is_float = false;
295 while !self.is_at_end() && self.current_char().is_ascii_digit() {
296 value.push(self.current_char());
297 self.advance();
298 }
299
300 if !self.is_at_end() && self.current_char() == '.' {
301 if self.peek(1) != Some('.') && self.peek(1).map_or(false, |c| c.is_ascii_digit()) {
302 is_float = true;
303 value.push(self.current_char());
304 self.advance();
305 while !self.is_at_end() && self.current_char().is_ascii_digit() {
306 value.push(self.current_char());
307 self.advance();
308 }
309 }
310 }
311
312 if !self.is_at_end() && (self.current_char() == 'e' || self.current_char() == 'E') {
313 is_float = true;
314 value.push(self.current_char());
315 self.advance();
316 if !self.is_at_end() && (self.current_char() == '+' || self.current_char() == '-') {
317 value.push(self.current_char());
318 self.advance();
319 }
320
321 while !self.is_at_end() && self.current_char().is_ascii_digit() {
322 value.push(self.current_char());
323 self.advance();
324 }
325 }
326
327 let kind = if is_float {
328 TokenKind::Float
329 } else {
330 TokenKind::Integer
331 };
332 Ok((kind, value))
333 }
334
335 fn scan_identifier(&mut self) -> Result<(TokenKind, String)> {
336 let mut value = String::new();
337 while !self.is_at_end()
338 && (self.current_char().is_alphanumeric() || self.current_char() == '_')
339 {
340 value.push(self.current_char());
341 self.advance();
342 }
343
344 let kind = TokenKind::keyword(&value).unwrap_or(TokenKind::Identifier);
345 Ok((kind, value))
346 }
347
348 fn skip_whitespace_and_comments(&mut self) -> Result<()> {
349 while !self.is_at_end() {
350 match self.current_char() {
351 ' ' | '\t' | '\r' => {
352 self.advance();
353 }
354
355 '\n' => {
356 self.advance();
357 self.line += 1;
358 self.column = 1;
359 }
360
361 '-' => {
362 if self.peek(1) == Some('-') {
363 if self.peek(2) == Some('[') && self.peek(3) == Some('[') {
364 self.advance();
365 self.advance();
366 self.advance();
367 self.advance();
368 self.skip_block_comment()?;
369 continue;
370 }
371
372 self.advance();
373 self.advance();
374 while !self.is_at_end() && self.current_char() != '\n' {
375 self.advance();
376 }
377 } else {
378 break;
379 }
380 }
381
382 '#' => {
383 self.advance();
384 while !self.is_at_end() && self.current_char() != '\n' {
385 self.advance();
386 }
387 }
388
389 _ => break,
390 }
391 }
392
393 Ok(())
394 }
395
396 fn skip_block_comment(&mut self) -> Result<()> {
397 while !self.is_at_end() {
398 if self.current_char() == ']' && self.peek(1) == Some(']') {
399 self.advance();
400 self.advance();
401 return Ok(());
402 }
403
404 if self.current_char() == '\n' {
405 self.advance();
406 self.line += 1;
407 self.column = 1;
408 } else {
409 self.advance();
410 }
411 }
412
413 Err(LustError::LexerError {
414 line: self.line,
415 column: self.column,
416 message: "Unterminated block comment".to_string(),
417 module: None,
418 })
419 }
420
421 fn current_char(&self) -> char {
422 if self.is_at_end() {
423 '\0'
424 } else {
425 self.input[self.position]
426 }
427 }
428
429 fn peek(&self, offset: usize) -> Option<char> {
430 let pos = self.position + offset;
431 if pos < self.input.len() {
432 Some(self.input[pos])
433 } else {
434 None
435 }
436 }
437
438 fn advance(&mut self) {
439 if !self.is_at_end() {
440 self.position += 1;
441 self.column += 1;
442 }
443 }
444
445 fn is_at_end(&self) -> bool {
446 self.position >= self.input.len()
447 }
448}