1pub mod iter;
2pub mod position;
3pub mod token;
4
5use crate::lexer::position::Position;
6use crate::lexer::token::Token;
7use std::ops::Add;
8
9#[derive(Debug, Clone, PartialEq)]
11enum State {
12 Initial,
14 Identifier,
16 Number,
18 Decimal,
20 String,
22 FormatString,
24 Comment,
26 MultilineComment,
28 Done(TokenType),
30}
31#[derive(Debug, Clone, Copy, PartialEq)]
33enum TokenType {
34 Identifier,
36 Integer,
38 Float,
40 Symbol,
42}
43
44#[derive(Debug, Clone)]
45pub struct Lexer {
46 file_name: String,
47 chars: Vec<char>,
48 index: usize,
49 col: usize,
50 row: usize,
51 keywords: Vec<&'static str>,
52 state: State,
54 buffer: String,
56 start_pos: usize,
58 start_col: usize,
59 start_row: usize,
60}
61
62impl Lexer {
71 pub fn is_eof(&self) -> bool {
72 self.index >= self.chars.len()
73 }
74 pub fn new(file_name: impl Into<String>) -> Self {
75 Self {
76 chars: vec![],
77 index: 0,
78 col: 1,
79 row: 1,
80 file_name: file_name.into(),
81 keywords: vec![
82 "let", "fn", "fun", "return", "if", "while", "import", "else", "val", "var",
83 "break", "continue", "for", "in", "class", "static", "trait", "struct", "extern",
84 "module", "const", "enum", "match",
85 ],
86 state: State::Initial,
87 buffer: String::new(),
88 start_pos: 0,
89 start_col: 1,
90 start_row: 1,
91 }
92 }
93 #[allow(unused)]
94 pub fn get_file_name(&self) -> String {
95 self.file_name.clone()
96 }
97 #[allow(unused)]
98 pub fn line(&self, line: usize) -> String {
99 let mut s = String::new();
100 let mut current_line = 1;
101 for i in &self.chars {
102 if i == &'\n' {
103 current_line += 1;
104 continue;
105 }
106 if current_line == line {
107 s.push(*i);
108 }
109 if current_line > line {
110 break;
111 }
112 }
113 s
114 }
115 #[allow(unused)]
116 pub fn set_chars(&mut self, chars: Vec<char>) {
117 self.chars = chars;
118 }
119
120 fn start_token(&mut self) {
122 self.buffer.clear();
123 self.start_pos = self.index;
124 self.start_col = self.col;
125 self.start_row = self.row;
126 self.state = State::Initial;
127 }
128
129 fn create_position(&self) -> Position {
131 Position::new(
132 self.start_pos,
133 self.buffer.len(),
134 self.start_row,
135 self.start_col,
136 &self.file_name,
137 )
138 }
139
140 pub fn next_token(&mut self) -> Option<(Token, Position)> {
141 self.start_token();
142
143 while self.index < self.chars.len() {
144 let c = self.current_char().unwrap();
145 let peek = self.peek_char().unwrap_or('\0');
146
147 match &self.state {
148 State::Initial => self.process_initial_state(c, peek),
149 State::Identifier => self.process_identifier_state(c),
150 State::Number => self.process_number_state(c, peek),
151 State::Decimal => self.process_decimal_state(c),
152 State::String => {
153 if let Some(token) = self.process_string_state(c) {
154 return Some(token);
155 }
156 }
157 State::FormatString => {
158 if let Some(token) = self.process_format_string_state(c) {
159 return Some(token);
160 }
161 }
162 State::Comment => {
163 if c == '\n' {
164 self.state = State::Initial;
165 }
166 self.increase_index();
167 }
168 State::MultilineComment => {
169 if c == '*' && peek == '/' {
170 self.increase_index();
171 self.increase_index();
172 self.state = State::Initial;
173 } else {
174 self.increase_index();
175 }
176 }
177 State::Done(token_type) => {
178 return self.finalize_token(*token_type);
179 }
180 }
181
182 if let State::Done(token_type) = &self.state {
184 return self.finalize_token(*token_type);
185 }
186 }
187
188 if !self.buffer.is_empty() {
190 if let State::Identifier = self.state {
191 return self.finalize_token(TokenType::Identifier);
192 } else if let State::Number = self.state {
193 return self.finalize_token(TokenType::Integer);
194 } else if let State::Decimal = self.state {
195 return self.finalize_token(TokenType::Float);
196 } else {
197 return self.finalize_token(TokenType::Identifier);
199 }
200 }
201
202 None
203 }
204
205 fn process_initial_state(&mut self, c: char, peek: char) {
207 match (c, peek) {
208 ('r', '"') => {
209 self.buffer.push(c);
210 self.buffer.push(peek);
211 self.increase_index();
212 self.increase_index();
213 self.state = State::FormatString;
214 }
215 ('.', p) if !p.is_numeric() => {
216 self.buffer.push(c);
217 self.increase_index();
218 self.state = State::Done(TokenType::Symbol);
219 }
220 ('0'..='9', _) => {
221 self.buffer.push(c);
222 self.increase_index();
223 self.state = State::Number;
224 }
225 ('.', p) if p.is_numeric() => {
226 self.buffer.push(c);
227 self.increase_index();
228 self.state = State::Decimal;
229 }
230 ('a'..='z' | 'A'..='Z' | '_', _) => {
231 self.buffer.push(c);
232 self.increase_index();
233 self.state = State::Identifier;
234 }
235 ('"', _) => {
236 self.buffer.push(c);
237 self.increase_index();
238 self.state = State::String;
239 }
240 ('\'', _) => {
241 self.buffer.push(c);
242 self.increase_index();
243 self.state = State::String;
244 }
245 ('/', '/') => {
246 self.increase_index();
247 self.increase_index();
248 self.state = State::Comment;
249 }
250 ('/', '*') => {
251 self.increase_index();
252 self.increase_index();
253 self.state = State::MultilineComment;
254 }
255 (':', ':') => {
256 self.buffer.push(':');
257 self.buffer.push(':');
258 self.increase_index();
259 self.increase_index();
260 self.state = State::Done(TokenType::Symbol);
261 }
262 ('!', '=') => {
263 self.buffer.push('!');
264 self.buffer.push('=');
265 self.increase_index();
266 self.increase_index();
267 self.state = State::Done(TokenType::Symbol);
268 }
269 ('=', '=') => {
270 self.buffer.push('=');
271 self.buffer.push('=');
272 self.increase_index();
273 self.increase_index();
274 self.state = State::Done(TokenType::Symbol);
275 }
276 ('|', '|') => {
277 self.buffer.push('|');
278 self.buffer.push('|');
279 self.increase_index();
280 self.increase_index();
281 self.state = State::Done(TokenType::Symbol);
282 }
283 ('&', '&') => {
284 self.buffer.push('&');
285 self.buffer.push('&');
286 self.increase_index();
287 self.increase_index();
288 self.state = State::Done(TokenType::Symbol);
289 }
290 ('-', '>') => {
291 self.buffer.push('-');
292 self.buffer.push('>');
293 self.increase_index();
294 self.increase_index();
295 self.state = State::Done(TokenType::Symbol);
296 }
297 ('>', '=') => {
298 self.buffer.push('>');
299 self.buffer.push('=');
300 self.increase_index();
301 self.increase_index();
302 self.state = State::Done(TokenType::Symbol);
303 }
304 ('<', '=') => {
305 self.buffer.push('<');
306 self.buffer.push('=');
307 self.increase_index();
308 self.increase_index();
309 self.state = State::Done(TokenType::Symbol);
310 }
311 ('(', _)
312 | (')', _)
313 | ('{', _)
314 | ('}', _)
315 | ('[', _)
316 | (']', _)
317 | (':', _)
318 | (',', _)
319 | ('|', _)
320 | ('!', _)
321 | ('@', _)
322 | ('=', _)
323 | ('&', _)
324 | ('>', _)
325 | ('<', _)
326 | ('+', _)
327 | ('-', _)
328 | ('*', _)
329 | ('%', _)
330 | ('/', _) => {
331 self.buffer.push(c);
332 self.increase_index();
333 self.state = State::Done(TokenType::Symbol);
334 }
335 (' ' | '\r' | ';' | '\t' | '\n', _) => {
336 self.increase_index();
337 }
338 _ => {
339 println!("Unexpected character: {:?}", c);
340 self.increase_index();
341 }
342 }
343 }
344
345 fn process_identifier_state(&mut self, c: char) {
347 if c.is_alphanumeric() || c == '_' {
348 self.buffer.push(c);
349 self.increase_index();
350 } else {
351 self.state = State::Done(TokenType::Identifier);
352 }
353 }
354
355 fn process_number_state(&mut self, c: char, peek: char) {
357 if c.is_numeric() {
358 self.buffer.push(c);
359 self.increase_index();
360 } else if c == '.' && peek.is_numeric() {
361 self.buffer.push(c);
362 self.increase_index();
363 self.state = State::Decimal;
364 } else {
365 self.state = State::Done(TokenType::Integer);
366 }
367 }
368
369 fn process_decimal_state(&mut self, c: char) {
371 if c.is_numeric() {
372 self.buffer.push(c);
373 self.increase_index();
374 } else {
375 self.state = State::Done(TokenType::Float);
376 }
377 }
378
379 fn process_string_state(&mut self, c: char) -> Option<(Token, Position)> {
381 let quote = self.buffer.chars().next().unwrap();
382 if c == quote {
383 self.buffer.push(c);
384 self.increase_index();
385 let pos = self.create_position();
386 let content = self.buffer[1..self.buffer.len() - 1]
387 .to_string()
388 .replace("\\n", "\n");
389 Some((Token::String(content), pos))
390 } else {
391 self.buffer.push(c);
392 self.increase_index();
393 None
394 }
395 }
396
397 fn process_format_string_state(&mut self, c: char) -> Option<(Token, Position)> {
399 if c == '"' {
400 self.buffer.push(c);
401 self.increase_index();
402 let pos = self.create_position();
403 let content = self.buffer[2..self.buffer.len() - 1]
404 .to_string()
405 .replace("\\n", "\n");
406 Some((Token::FormatString(content), pos))
407 } else {
408 self.buffer.push(c);
409 self.increase_index();
410 None
411 }
412 }
413
414 fn finalize_token(&mut self, token_type: TokenType) -> Option<(Token, Position)> {
416 let pos = self.create_position();
417
418 let token = match token_type {
420 TokenType::Identifier => {
421 let ident = self.buffer.clone();
422 if self.keywords.contains(&ident.as_str()) {
423 Token::Keyword(ident)
424 } else if ident == "true" {
425 Token::Boolean(true)
426 } else if ident == "false" {
427 Token::Boolean(false)
428 } else {
429 Token::Identifier(ident)
430 }
431 }
432 TokenType::Integer => {
433 let i: i64 = self.buffer.parse().unwrap();
434 Token::Int(i)
435 }
436 TokenType::Float => {
437 let f: f32 = self.buffer.parse().unwrap();
438 Token::Float(f)
439 }
440 TokenType::Symbol => match self.buffer.as_str() {
441 "(" => Token::BraceLeft,
442 ")" => Token::BraceRight,
443 "{" => Token::ParenLeft,
444 "}" => Token::ParenRight,
445 "[" => Token::BracketLeft,
446 "]" => Token::BracketRight,
447 "." => Token::Dot,
448 ":" => Token::Colon,
449 "::" => Token::ScopeSymbol,
450 "=" => Token::Assign,
451 "," => Token::Comma,
452 "+" => Token::Plus,
453 "-" => Token::Minus,
454 "*" => Token::Star,
455 "/" => Token::Slash,
456 "%" => Token::Mod,
457 ">" => Token::Greater,
458 "<" => Token::Less,
459 "<=" => Token::LessEqual,
460 ">=" => Token::GreaterEqual,
461 "==" => Token::Equal,
462 "!=" => Token::NotEqual,
463 "->" => Token::Arrow,
464 "!" => Token::Not,
465 "&&" => Token::And,
466 "||" => Token::Or,
467 "|" => Token::Vertical,
468 "@" => Token::Annotation,
469 "&" => Token::BitAnd,
470 _ => {
471 println!("Unknown token: {}", self.buffer);
472 return None;
473 }
474 },
475 };
488
489 Some((token, pos))
490 }
491
492 fn peek_char(&self) -> Option<char> {
493 self.chars.get(self.index + 1).copied()
494 }
495
496 fn current_char(&self) -> Option<char> {
497 self.chars.get(self.index).copied()
498 }
499
500 fn increase_index(&mut self) {
501 if self.index < self.chars.len() && self.chars[self.index] == '\n' {
502 self.row += 1;
503 self.col = 0;
504 }
505 self.index = self.index.add(1);
506 self.col += 1;
507 }
508
509 #[allow(unused)]
510 pub fn get_source(&self) -> Vec<char> {
511 self.chars.clone()
512 }
513
514 pub fn from_script(module_name: impl Into<String>, script: impl AsRef<str>) -> Self {
515 let script = script.as_ref().chars().collect();
516 let mut lexer = Lexer::new(module_name);
517 lexer.set_chars(script);
518 lexer
519 }
520}