1pub mod iter;
2pub mod position;
3pub mod token;
4
5use crate::lexer::position::Position;
6use crate::lexer::token::Token;
7use std::ops::Add;
8
9#[derive(Debug, Clone, PartialEq)]
11enum State {
12 Initial,
14 Identifier,
16 Number,
18 Decimal,
20 String,
22 FormatString,
24 Comment,
26 MultilineComment,
28 Done(TokenType),
30}
31#[derive(Debug, Clone, Copy, PartialEq)]
33enum TokenType {
34 Identifier,
36 Integer,
38 Float,
40 Symbol,
42}
43
44#[derive(Debug, Clone)]
45pub struct Lexer {
46 file_name: String,
47 chars: Vec<char>,
48 index: usize,
49 col: usize,
50 row: usize,
51 keywords: Vec<&'static str>,
52 state: State,
54 buffer: String,
56 start_pos: usize,
58 start_col: usize,
59 start_row: usize,
60}
61
62impl Lexer {
71 pub fn is_eof(&self) -> bool {
72 self.index >= self.chars.len()
73 }
74 pub fn new(file_name: impl Into<String>) -> Self {
75 Self {
76 chars: vec![],
77 index: 0,
78 col: 1,
79 row: 1,
80 file_name: file_name.into(),
81 keywords: vec![
82 "let", "fn", "fun", "return", "if", "while", "import", "else", "val", "var",
83 "break", "continue", "for", "in", "class", "static", "trait", "struct", "extern",
84 "module", "const", "enum", "match",
85 ],
86 state: State::Initial,
87 buffer: String::new(),
88 start_pos: 0,
89 start_col: 1,
90 start_row: 1,
91 }
92 }
93 #[allow(unused)]
94 pub fn get_file_name(&self) -> String {
95 self.file_name.clone()
96 }
97 #[allow(unused)]
98 pub fn line(&self, line: usize) -> String {
99 let mut s = String::new();
100 let mut current_line = 1;
101 for i in &self.chars {
102 if i == &'\n' {
103 current_line += 1;
104 continue;
105 }
106 if current_line == line {
107 s.push(*i);
108 }
109 if current_line > line {
110 break;
111 }
112 }
113 s
114 }
115 #[allow(unused)]
116 pub fn set_chars(&mut self, chars: Vec<char>) {
117 self.chars = chars;
118 }
119
120 fn start_token(&mut self) {
122 self.buffer.clear();
123 self.start_pos = self.index;
124 self.start_col = self.col;
125 self.start_row = self.row;
126 self.state = State::Initial;
127 }
128
129 fn create_position(&self) -> Position {
131 Position::new(
132 self.start_pos,
133 self.buffer.len(),
134 self.start_row,
135 self.start_col,
136 &self.file_name,
137 )
138 }
139
140 pub fn next_token(&mut self) -> Option<(Token, Position)> {
141 self.start_token();
142
143 while self.index < self.chars.len() {
144 let c = self.current_char().unwrap();
145 let peek = self.peek_char().unwrap_or('\0');
146
147 match &self.state {
148 State::Initial => self.process_initial_state(c, peek),
149 State::Identifier => self.process_identifier_state(c),
150 State::Number => self.process_number_state(c, peek),
151 State::Decimal => self.process_decimal_state(c),
152 State::String => {
153 if let Some(token) = self.process_string_state(c) {
154 return Some(token);
155 }
156 }
157 State::FormatString => {
158 if let Some(token) = self.process_format_string_state(c) {
159 return Some(token);
160 }
161 }
162 State::Comment => {
163 if c == '\n' {
164 self.state = State::Initial;
165 }
166 self.increase_index();
167 }
168 State::MultilineComment => {
169 if c == '*' && peek == '/' {
170 self.increase_index();
171 self.increase_index();
172 self.state = State::Initial;
173 } else {
174 self.increase_index();
175 }
176 }
177 State::Done(token_type) => {
178 return self.finalize_token(*token_type);
179 }
180 }
181
182 if let State::Done(token_type) = &self.state {
184 return self.finalize_token(*token_type);
185 }
186 }
187
188 if !self.buffer.is_empty() {
190 if let State::Identifier = self.state {
191 return self.finalize_token(TokenType::Identifier);
192 } else if let State::Number = self.state {
193 return self.finalize_token(TokenType::Integer);
194 } else if let State::Decimal = self.state {
195 return self.finalize_token(TokenType::Float);
196 } else {
197 return self.finalize_token(TokenType::Identifier);
199 }
200 }
201
202 None
203 }
204
205 fn process_initial_state(&mut self, c: char, peek: char) {
207 match (c, peek) {
208 ('r', '"') => {
209 self.buffer.push(c);
210 self.buffer.push(peek);
211 self.increase_index();
212 self.increase_index();
213 self.state = State::FormatString;
214 }
215 ('.', p) if !p.is_numeric() => {
216 self.buffer.push(c);
217 self.increase_index();
218 self.state = State::Done(TokenType::Symbol);
219 }
220 ('0'..='9', _) => {
221 self.buffer.push(c);
222 self.increase_index();
223 self.state = State::Number;
224 }
225 ('.', p) if p.is_numeric() => {
226 self.buffer.push(c);
227 self.increase_index();
228 self.state = State::Decimal;
229 }
230 ('a'..='z' | 'A'..='Z' | '_', _) => {
231 self.buffer.push(c);
232 self.increase_index();
233 self.state = State::Identifier;
234 }
235 ('"', _) => {
236 self.buffer.push(c);
237 self.increase_index();
238 self.state = State::String;
239 }
240 ('\'', _) => {
241 self.buffer.push(c);
242 self.increase_index();
243 self.state = State::String;
244 }
245 ('/', '/') => {
246 self.increase_index();
247 self.increase_index();
248 self.state = State::Comment;
249 }
250 ('/', '*') => {
251 self.increase_index();
252 self.increase_index();
253 self.state = State::MultilineComment;
254 }
255 (':', ':') => {
256 self.buffer.push(':');
257 self.buffer.push(':');
258 self.increase_index();
259 self.increase_index();
260 self.state = State::Done(TokenType::Symbol);
261 }
262 ('!', '=') => {
263 self.buffer.push('!');
264 self.buffer.push('=');
265 self.increase_index();
266 self.increase_index();
267 self.state = State::Done(TokenType::Symbol);
268 }
269 ('=', '=') => {
270 self.buffer.push('=');
271 self.buffer.push('=');
272 self.increase_index();
273 self.increase_index();
274 self.state = State::Done(TokenType::Symbol);
275 }
276 ('&', '&') => {
277 self.buffer.push('&');
278 self.buffer.push('&');
279 self.increase_index();
280 self.increase_index();
281 self.state = State::Done(TokenType::Symbol);
282 }
283 ('-', '>') => {
284 self.buffer.push('-');
285 self.buffer.push('>');
286 self.increase_index();
287 self.increase_index();
288 self.state = State::Done(TokenType::Symbol);
289 }
290 ('>', '=') => {
291 self.buffer.push('>');
292 self.buffer.push('=');
293 self.increase_index();
294 self.increase_index();
295 self.state = State::Done(TokenType::Symbol);
296 }
297 ('<', '=') => {
298 self.buffer.push('<');
299 self.buffer.push('=');
300 self.increase_index();
301 self.increase_index();
302 self.state = State::Done(TokenType::Symbol);
303 }
304 ('(', _)
305 | (')', _)
306 | ('{', _)
307 | ('}', _)
308 | ('[', _)
309 | (']', _)
310 | (':', _)
311 | (',', _)
312 | ('|', _)
313 | ('!', _)
314 | ('@', _)
315 | ('=', _)
316 | ('&', _)
317 | ('>', _)
318 | ('<', _)
319 | ('+', _)
320 | ('-', _)
321 | ('*', _)
322 | ('%', _)
323 | ('/', _) => {
324 self.buffer.push(c);
325 self.increase_index();
326 self.state = State::Done(TokenType::Symbol);
327 }
328 (' ' | '\r' | ';' | '\t' | '\n', _) => {
329 self.increase_index();
330 }
331 _ => {
332 println!("Unexpected character: {:?}", c);
333 self.increase_index();
334 }
335 }
336 }
337
338 fn process_identifier_state(&mut self, c: char) {
340 if c.is_alphanumeric() || c == '_' {
341 self.buffer.push(c);
342 self.increase_index();
343 } else {
344 self.state = State::Done(TokenType::Identifier);
345 }
346 }
347
348 fn process_number_state(&mut self, c: char, peek: char) {
350 if c.is_numeric() {
351 self.buffer.push(c);
352 self.increase_index();
353 } else if c == '.' && peek.is_numeric() {
354 self.buffer.push(c);
355 self.increase_index();
356 self.state = State::Decimal;
357 } else {
358 self.state = State::Done(TokenType::Integer);
359 }
360 }
361
362 fn process_decimal_state(&mut self, c: char) {
364 if c.is_numeric() {
365 self.buffer.push(c);
366 self.increase_index();
367 } else {
368 self.state = State::Done(TokenType::Float);
369 }
370 }
371
372 fn process_string_state(&mut self, c: char) -> Option<(Token, Position)> {
374 let quote = self.buffer.chars().next().unwrap();
375 if c == quote {
376 self.buffer.push(c);
377 self.increase_index();
378 let pos = self.create_position();
379 let content = self.buffer[1..self.buffer.len() - 1]
380 .to_string()
381 .replace("\\n", "\n");
382 Some((Token::String(content), pos))
383 } else {
384 self.buffer.push(c);
385 self.increase_index();
386 None
387 }
388 }
389
390 fn process_format_string_state(&mut self, c: char) -> Option<(Token, Position)> {
392 if c == '"' {
393 self.buffer.push(c);
394 self.increase_index();
395 let pos = self.create_position();
396 let content = self.buffer[2..self.buffer.len() - 1]
397 .to_string()
398 .replace("\\n", "\n");
399 Some((Token::FormatString(content), pos))
400 } else {
401 self.buffer.push(c);
402 self.increase_index();
403 None
404 }
405 }
406
407 fn finalize_token(&mut self, token_type: TokenType) -> Option<(Token, Position)> {
409 let pos = self.create_position();
410
411 let token = match token_type {
413 TokenType::Identifier => {
414 let ident = self.buffer.clone();
415 if self.keywords.contains(&ident.as_str()) {
416 Token::Keyword(ident)
417 } else if ident == "true" {
418 Token::Boolean(true)
419 } else if ident == "false" {
420 Token::Boolean(false)
421 } else {
422 Token::Identifier(ident)
423 }
424 }
425 TokenType::Integer => {
426 let i: i64 = self.buffer.parse().unwrap();
427 Token::Int(i)
428 }
429 TokenType::Float => {
430 let f: f32 = self.buffer.parse().unwrap();
431 Token::Float(f)
432 }
433 TokenType::Symbol => match self.buffer.as_str() {
434 "(" => Token::BraceLeft,
435 ")" => Token::BraceRight,
436 "{" => Token::ParenLeft,
437 "}" => Token::ParenRight,
438 "[" => Token::BracketLeft,
439 "]" => Token::BracketRight,
440 "." => Token::Dot,
441 ":" => Token::Colon,
442 "::" => Token::ScopeSymbol,
443 "=" => Token::Assign,
444 "," => Token::Comma,
445 "+" => Token::Plus,
446 "-" => Token::Minus,
447 "*" => Token::Star,
448 "/" => Token::Slash,
449 "%" => Token::Mod,
450 ">" => Token::Greater,
451 "<" => Token::Less,
452 "<=" => Token::LessEqual,
453 ">=" => Token::GreaterEqual,
454 "==" => Token::Equal,
455 "!=" => Token::NotEqual,
456 "->" => Token::Arrow,
457 "!" => Token::Not,
458 "&&" => Token::And,
459 "|" => Token::Vertical,
460 "@" => Token::Annotation,
461 "&" => Token::BitAnd,
462 _ => {
463 println!("Unknown token: {}", self.buffer);
464 return None;
465 }
466 },
467 };
480
481 Some((token, pos))
482 }
483
484 fn peek_char(&self) -> Option<char> {
485 self.chars.get(self.index + 1).copied()
486 }
487
488 fn current_char(&self) -> Option<char> {
489 self.chars.get(self.index).copied()
490 }
491
492 fn increase_index(&mut self) {
493 if self.index < self.chars.len() && self.chars[self.index] == '\n' {
494 self.row += 1;
495 self.col = 0;
496 }
497 self.index = self.index.add(1);
498 self.col += 1;
499 }
500
501 #[allow(unused)]
502 pub fn get_source(&self) -> Vec<char> {
503 self.chars.clone()
504 }
505
506 pub fn from_script(module_name: impl Into<String>, script: impl AsRef<str>) -> Self {
507 let script = script.as_ref().chars().collect();
508 let mut lexer = Lexer::new(module_name);
509 lexer.set_chars(script);
510 lexer
511 }
512}