1pub mod iter;
2pub mod position;
3pub mod token;
4
5use crate::lexer::position::Position;
6use crate::lexer::token::Token;
7use std::ops::Add;
8
9#[derive(Debug, Clone, PartialEq)]
11enum State {
12 Initial,
14 Identifier,
16 Number,
18 Decimal,
20 String,
22 FormatString,
24 Comment,
26 MultilineComment,
28 Done(TokenType),
30}
31#[derive(Debug, Clone, Copy, PartialEq)]
33enum TokenType {
34 Identifier,
36 Integer,
38 Float,
40 Symbol,
42}
43
44#[derive(Debug, Clone)]
45pub struct Lexer {
46 file_name: String,
47 chars: Vec<char>,
48 index: usize,
49 col: usize,
50 row: usize,
51 keywords: Vec<&'static str>,
52 state: State,
54 buffer: String,
56 start_pos: usize,
58 start_col: usize,
59 start_row: usize,
60}
61
62impl Lexer {
71 pub fn is_eof(&self) -> bool {
72 self.index >= self.chars.len()
73 }
74 pub fn new(file_name: impl Into<String>) -> Self {
75 Self {
76 chars: vec![],
77 index: 0,
78 col: 1,
79 row: 1,
80 file_name: file_name.into(),
81 keywords: vec![
82 "let", "fn", "fun", "return", "if", "while", "import", "else", "val", "var",
83 "break", "continue", "for", "in", "class", "static", "trait", "struct", "extern",
84 "module", "const", "enum", "match",
85 ],
86 state: State::Initial,
87 buffer: String::new(),
88 start_pos: 0,
89 start_col: 1,
90 start_row: 1,
91 }
92 }
93 #[allow(unused)]
94 pub fn get_file_name(&self) -> String {
95 self.file_name.clone()
96 }
97 #[allow(unused)]
98 pub fn line(&self, line: usize) -> String {
99 let mut s = String::new();
100 let mut current_line = 1;
101 for i in &self.chars {
102 if i == &'\n' {
103 current_line += 1;
104 continue;
105 }
106 if current_line == line {
107 s.push(*i);
108 }
109 if current_line > line {
110 break;
111 }
112 }
113 s
114 }
115 #[allow(unused)]
116 pub fn set_chars(&mut self, chars: Vec<char>) {
117 self.chars = chars;
118 }
119
120 fn start_token(&mut self) {
122 self.buffer.clear();
123 self.start_pos = self.index;
124 self.start_col = self.col;
125 self.start_row = self.row;
126 self.state = State::Initial;
127 }
128
129 fn create_position(&self) -> Position {
131 Position::new(
132 self.start_pos,
133 self.buffer.len(),
134 self.start_row,
135 self.start_col,
136 &self.file_name,
137 )
138 }
139
140 pub fn next_token(&mut self) -> Option<(Token, Position)> {
141 self.start_token();
142
143 while self.index < self.chars.len() {
144 let c = self.current_char().unwrap();
145 let peek = self.peek_char().unwrap_or('\0');
146
147 match &self.state {
148 State::Initial => self.process_initial_state(c, peek),
149 State::Identifier => self.process_identifier_state(c),
150 State::Number => self.process_number_state(c, peek),
151 State::Decimal => self.process_decimal_state(c),
152 State::String => {
153 if let Some(token) = self.process_string_state(c) {
154 return Some(token);
155 }
156 }
157 State::FormatString => {
158 if let Some(token) = self.process_format_string_state(c) {
159 return Some(token);
160 }
161 }
162 State::Comment => {
163 if c == '\n' {
164 self.state = State::Initial;
165 }
166 self.increase_index();
167 }
168 State::MultilineComment => {
169 if c == '*' && peek == '/' {
170 self.increase_index();
171 self.increase_index();
172 self.state = State::Initial;
173 } else {
174 self.increase_index();
175 }
176 }
177 State::Done(token_type) => {
178 return self.finalize_token(*token_type);
179 }
180 }
181
182 if let State::Done(token_type) = &self.state {
184 return self.finalize_token(*token_type);
185 }
186 }
187
188 if !self.buffer.is_empty() {
190 if let State::Identifier = self.state {
191 return self.finalize_token(TokenType::Identifier);
192 } else if let State::Number = self.state {
193 return self.finalize_token(TokenType::Integer);
194 } else if let State::Decimal = self.state {
195 return self.finalize_token(TokenType::Float);
196 } else {
197 return self.finalize_token(TokenType::Identifier);
199 }
200 }
201
202 None
203 }
204
205 fn process_initial_state(&mut self, c: char, peek: char) {
207 match (c, peek) {
208 ('r', '"') => {
209 self.buffer.push(c);
210 self.buffer.push(peek);
211 self.increase_index();
212 self.increase_index();
213 self.state = State::FormatString;
214 }
215 ('.', p) if !p.is_numeric() => {
216 self.buffer.push(c);
217 self.increase_index();
218 self.state = State::Done(TokenType::Symbol);
219 }
220 ('-','0'..='9') => {
221 self.buffer.push(c);
222 self.increase_index();
223 self.state = State::Number;
224 }
225 ('0'..='9', _) => {
226 self.buffer.push(c);
227 self.increase_index();
228 self.state = State::Number;
229 }
230 ('.', p) if p.is_numeric() => {
231 self.buffer.push(c);
232 self.increase_index();
233 self.state = State::Decimal;
234 }
235 ('a'..='z' | 'A'..='Z' | '_', _) => {
236 self.buffer.push(c);
237 self.increase_index();
238 self.state = State::Identifier;
239 }
240 ('"', _) => {
241 self.buffer.push(c);
242 self.increase_index();
243 self.state = State::String;
244 }
245 ('\'', _) => {
246 self.buffer.push(c);
247 self.increase_index();
248 self.state = State::String;
249 }
250 ('/', '/') => {
251 self.increase_index();
252 self.increase_index();
253 self.state = State::Comment;
254 }
255 ('/', '*') => {
256 self.increase_index();
257 self.increase_index();
258 self.state = State::MultilineComment;
259 }
260 (':', ':') => {
261 self.buffer.push(':');
262 self.buffer.push(':');
263 self.increase_index();
264 self.increase_index();
265 self.state = State::Done(TokenType::Symbol);
266 }
267 ('!', '=') => {
268 self.buffer.push('!');
269 self.buffer.push('=');
270 self.increase_index();
271 self.increase_index();
272 self.state = State::Done(TokenType::Symbol);
273 }
274 ('=', '=') => {
275 self.buffer.push('=');
276 self.buffer.push('=');
277 self.increase_index();
278 self.increase_index();
279 self.state = State::Done(TokenType::Symbol);
280 }
281 ('|', '|') => {
282 self.buffer.push('|');
283 self.buffer.push('|');
284 self.increase_index();
285 self.increase_index();
286 self.state = State::Done(TokenType::Symbol);
287 }
288 ('&', '&') => {
289 self.buffer.push('&');
290 self.buffer.push('&');
291 self.increase_index();
292 self.increase_index();
293 self.state = State::Done(TokenType::Symbol);
294 }
295 ('-', '>') => {
296 self.buffer.push('-');
297 self.buffer.push('>');
298 self.increase_index();
299 self.increase_index();
300 self.state = State::Done(TokenType::Symbol);
301 }
302 ('>', '=') => {
303 self.buffer.push('>');
304 self.buffer.push('=');
305 self.increase_index();
306 self.increase_index();
307 self.state = State::Done(TokenType::Symbol);
308 }
309 ('<', '=') => {
310 self.buffer.push('<');
311 self.buffer.push('=');
312 self.increase_index();
313 self.increase_index();
314 self.state = State::Done(TokenType::Symbol);
315 }
316 ('(', _)
317 | (')', _)
318 | ('{', _)
319 | ('}', _)
320 | ('[', _)
321 | (']', _)
322 | (':', _)
323 | (',', _)
324 | ('|', _)
325 | ('!', _)
326 | ('@', _)
327 | ('=', _)
328 | ('&', _)
329 | ('>', _)
330 | ('<', _)
331 | ('+', _)
332 | ('-', _)
333 | ('*', _)
334 | ('%', _)
335 | ('/', _) => {
336 self.buffer.push(c);
337 self.increase_index();
338 self.state = State::Done(TokenType::Symbol);
339 }
340 (' ' | '\r' | ';' | '\t' | '\n', _) => {
341 self.increase_index();
342 }
343 _ => {
344 println!("Unexpected character: {:?}", c);
345 self.increase_index();
346 }
347 }
348 }
349
350 fn process_identifier_state(&mut self, c: char) {
352 if c.is_alphanumeric() || c == '_' {
353 self.buffer.push(c);
354 self.increase_index();
355 } else {
356 self.state = State::Done(TokenType::Identifier);
357 }
358 }
359
360 fn process_number_state(&mut self, c: char, peek: char) {
362 if c.is_numeric() {
363 self.buffer.push(c);
364 self.increase_index();
365 } else if c == '.' && peek.is_numeric() {
366 self.buffer.push(c);
367 self.increase_index();
368 self.state = State::Decimal;
369 } else {
370 self.state = State::Done(TokenType::Integer);
371 }
372 }
373
374 fn process_decimal_state(&mut self, c: char) {
376 if c.is_numeric() {
377 self.buffer.push(c);
378 self.increase_index();
379 } else {
380 self.state = State::Done(TokenType::Float);
381 }
382 }
383
384 fn process_string_state(&mut self, c: char) -> Option<(Token, Position)> {
386 let quote = self.buffer.chars().next().unwrap();
387 if c == quote {
388 self.buffer.push(c);
389 self.increase_index();
390 let pos = self.create_position();
391 let content = self.buffer[1..self.buffer.len() - 1]
392 .to_string()
393 .replace("\\n", "\n");
394 Some((Token::String(content), pos))
395 } else {
396 self.buffer.push(c);
397 self.increase_index();
398 None
399 }
400 }
401
402 fn process_format_string_state(&mut self, c: char) -> Option<(Token, Position)> {
404 if c == '"' {
405 self.buffer.push(c);
406 self.increase_index();
407 let pos = self.create_position();
408 let content = self.buffer[2..self.buffer.len() - 1]
409 .to_string()
410 .replace("\\n", "\n");
411 Some((Token::FormatString(content), pos))
412 } else {
413 self.buffer.push(c);
414 self.increase_index();
415 None
416 }
417 }
418
419 fn finalize_token(&mut self, token_type: TokenType) -> Option<(Token, Position)> {
421 let pos = self.create_position();
422
423 let token = match token_type {
425 TokenType::Identifier => {
426 let ident = self.buffer.clone();
427 if self.keywords.contains(&ident.as_str()) {
428 Token::Keyword(ident)
429 } else if ident == "true" {
430 Token::Boolean(true)
431 } else if ident == "false" {
432 Token::Boolean(false)
433 } else {
434 Token::Identifier(ident)
435 }
436 }
437 TokenType::Integer => {
438 let i: i64 = self.buffer.parse().unwrap();
439 Token::Int(i)
440 }
441 TokenType::Float => {
442 let f: f64 = self.buffer.parse().unwrap();
443 Token::Float(f)
444 }
445 TokenType::Symbol => match self.buffer.as_str() {
446 "(" => Token::BraceLeft,
447 ")" => Token::BraceRight,
448 "{" => Token::ParenLeft,
449 "}" => Token::ParenRight,
450 "[" => Token::BracketLeft,
451 "]" => Token::BracketRight,
452 "." => Token::Dot,
453 ":" => Token::Colon,
454 "::" => Token::ScopeSymbol,
455 "=" => Token::Assign,
456 "," => Token::Comma,
457 "+" => Token::Plus,
458 "-" => Token::Minus,
459 "*" => Token::Star,
460 "/" => Token::Slash,
461 "%" => Token::Mod,
462 ">" => Token::Greater,
463 "<" => Token::Less,
464 "<=" => Token::LessEqual,
465 ">=" => Token::GreaterEqual,
466 "==" => Token::Equal,
467 "!=" => Token::NotEqual,
468 "->" => Token::Arrow,
469 "!" => Token::Not,
470 "&&" => Token::And,
471 "||" => Token::Or,
472 "|" => Token::Vertical,
473 "@" => Token::Annotation,
474 "&" => Token::BitAnd,
475 _ => {
476 println!("Unknown token: {}", self.buffer);
477 return None;
478 }
479 },
480 };
493
494 Some((token, pos))
495 }
496
497 fn peek_char(&self) -> Option<char> {
498 self.chars.get(self.index + 1).copied()
499 }
500
501 fn current_char(&self) -> Option<char> {
502 self.chars.get(self.index).copied()
503 }
504
505 fn increase_index(&mut self) {
506 if self.index < self.chars.len() && self.chars[self.index] == '\n' {
507 self.row += 1;
508 self.col = 0;
509 }
510 self.index = self.index.add(1);
511 self.col += 1;
512 }
513
514 #[allow(unused)]
515 pub fn get_source(&self) -> Vec<char> {
516 self.chars.clone()
517 }
518
519 pub fn from_script(module_name: impl Into<String>, script: impl AsRef<str>) -> Self {
520 let script = script.as_ref().chars().collect();
521 let mut lexer = Lexer::new(module_name);
522 lexer.set_chars(script);
523 lexer
524 }
525}