1use crate::lexer::token::Token;
2use crate::lexer::token::TokenType::*;
3use crate::lexer::token::{LiteralValue, TokenType};
4use miette::{miette, LabeledSpan, Report, SourceSpan};
5use owo_colors::OwoColorize;
6use std::collections::HashMap;
7use std::convert::From;
8use std::fmt::Display;
9use std::sync::Arc;
10
11pub struct Lexer {
12 file_name: String,
13 source: Arc<str>,
14
15 pub(super) tokens: Vec<Token>,
16
17 start: usize,
18 current: usize,
19 line: usize,
20
21 keywords: HashMap<&'static str, TokenType>,
22}
23
24impl Lexer {
25 pub fn new(input: impl Into<Arc<str>>, file_name: String) -> Self {
26 Self {
27 file_name,
28 source: input.into(),
29 tokens: Vec::new(),
30 start: 0,
31 current: 0,
32 line: 1,
33 keywords: crate::lexer::token::get_keywords_hashmap(),
34 }
35 }
36
37 pub fn scan(
38 input: impl Into<Arc<str>>,
39 file_name: String,
40 ) -> miette::Result<Vec<Token>, Vec<Report>> {
41 let mut lexer = Self::new(input, file_name);
42 let tokens = lexer.scan_tokens()?;
43
44 Ok(tokens)
45 }
46
47 pub fn scan_tokens(&mut self) -> miette::Result<Vec<Token>, Vec<Report>> {
48 let mut errors: Vec<Report> = vec![];
49 while !self.is_at_end() {
50 self.start = self.current;
52 match self.scan_token() {
53 Ok(_) => (),
54 Err(msg) => errors.push(msg),
55 }
56 }
57
58 self.tokens.push(Token {
60 token_type: Eof,
61 lexeme: "<EOF>".to_string(),
62 literal: None,
63 span: SourceSpan::new(self.start.into(), 0usize),
64 line_number: self.line,
65 source: self.source.clone(), });
67
68 if !errors.is_empty() {
69 return Err(errors);
70 }
71
72 Ok(self.tokens.clone())
73 }
74
75 fn is_at_end(&self) -> bool {
76 self.current >= self.source.len()
77 }
78
79 fn scan_token(&mut self) -> miette::Result<()> {
80 let c = self.advance();
81
82 match c {
83 '(' => self.add_token(LeftParen),
84 ')' => self.add_token(RightParen),
85 '[' => self.add_token(LeftBracket),
86 ']' => self.add_token(RightBracket),
87 '{' => self.add_token(LeftBrace),
88 '}' => self.add_token(RightBrace),
89 ',' => self.add_token(Comma),
90 '.' => self.add_token(Dot),
91 '-' => self.add_token(Minus),
92 '+' => self.add_token(Plus),
93 '*' => self.add_token(Star),
94 ';' => self.add_token(SoftSemi),
95 '!' => {
96 if self.char_match('=') {
97 self.add_token(BangEqual)
98 } else {
99 let labels = vec![LabeledSpan::at(
100 self.current_span(),
101 "operator `!` (bang) not allowed in syntax",
102 )];
103 let error = miette!(
104 labels = labels,
105 code = "lexer::unknown_symbol::bang",
106 help = "for logical not write `NOT` instead of `!`",
107 "{} unknown symbol `!`",
108 self.location_string()
109 )
110 .with_source_code(self.source.clone());
111
112 return Err(error);
113 }
114 }
115 '=' => {
116 if self.char_match('=') {
117 self.add_token(EqualEqual)
118 } else {
119 let labels = vec![LabeledSpan::at(
120 self.current_span(),
121 "operator `=` (equals) not allowed in syntax",
122 )];
123 let error = miette!(
124 labels = labels,
125 code = "lexer::unknown_symbol::equals",
126 help = "for logical equals write `==` instead of `=`\n\
127 to assign to a variable write `<-` instead of `=`",
128 "{} unknown symbol `=`",
129 self.location_string()
130 )
131 .with_source_code(self.source.clone());
132
133 return Err(error);
134 }
135 }
136 '<' => {
137 let token = if self.char_match('=') {
138 LessEqual
139 } else if self.char_match('-') {
140 Arrow
141 } else {
142 Less
143 };
144
145 self.add_token(token)
146 }
147 '>' => {
148 let token = if self.char_match('=') {
149 GreaterEqual
150 } else {
151 Greater
152 };
153
154 self.add_token(token)
155 }
156 '/' => {
157 if self.char_match('/') {
158 loop {
160 if self.peek() == '\n' || self.is_at_end() {
161 break;
162 }
163 self.advance();
164 }
165 } else {
166 self.add_token(Slash)
167 }
168 },
169 '\\' => {
170 if !self.char_match('\n') {
171 let labels = vec![LabeledSpan::at(
172 self.current_span(),
173 "must be followed by newline (\\n)",
174 )];
175
176 let error = miette!(
177 labels = labels,
178 help = "use \\ to escape a newline",
179 "expected newline (\\n) following \\ instead found {}",
180 self.peek()
181 ).with_source_code(self.source.clone());
182
183 return Err(error);
184 }
185 },
186 ' ' | '\r' | '\t' => { }
187 '\n' => {
188 if let Some(prev) = self.tokens.last() {
189 self.line += 1;
190 match prev.token_type {
193 Identifier | Number | StringLiteral | Null | True | False | Break | Continue | Return |
196 RightParen | RightBracket | RightBrace
197 => {
198 self.add_token(SoftSemi)
199 }
200 _ => {}
202 }
203 };
204 }
205 '"' => self.string()?,
206 ch if ch.is_ascii_digit() => self.number()?,
207 ch if ch.is_alphanumeric() => self.identifier(),
208 ch => {
209 let labels = vec![LabeledSpan::at(
210 self.current_span(),
211 format!("symbol `{ch}` is not allowed in syntax"),
212 )];
213
214 let error = miette!(
215 labels = labels,
216 code = "lexer::unknown_symbol",
217 "{} unknown symbol `{ch}`",
218 self.location_string()
219 )
220 .with_source_code(self.source.clone());
221
222 return Err(error);
223 }
224 }
225
226 Ok(())
227 }
228
229 fn string(&mut self) -> miette::Result<()> {
230 let mut result = String::new();
231
232 while self.peek() != '"' && !self.is_at_end() {
233 if self.peek() == '\n' {
234 self.line += 1;
235 }
236
237 if self.peek() == '\\' {
239 self.advance(); match self.peek() {
242 'n' => {
243 result.push('\n');
244 self.advance(); }
246 'r' => {
247 result.push('\r');
248 self.advance(); }
250 't' => {
251 result.push('\t');
252 self.advance(); }
254 '\\' => {
255 result.push('\\');
256 self.advance(); }
258 '"' => {
259 result.push('"');
260 self.advance(); }
262 _ => {
263 return Err(miette!("Invalid escape sequence: \\{}", self.peek()));
265 }
266 }
267 } else {
268 result.push(self.advance()); }
270 }
271
272 if self.is_at_end() {
274 let labels = vec![
275 LabeledSpan::at_offset(self.start, "unmatched quote"),
276 LabeledSpan::at(self.current_span(), "unmatched quote"),
277 ];
278
279 let error = miette!(
280 labels = labels,
281 code = "lexer::unterminated_string",
282 help = "A string literal must end with a matching quote",
283 "{} unterminated string",
284 self.location_string()
285 )
286 .with_source_code(self.source.clone());
287
288 return Err(error);
289 }
290
291 self.advance(); self.add_token_lit(StringLiteral, Some(LiteralValue::String(result)));
295
296 Ok(())
297 }
298
299 fn number(&mut self) -> miette::Result<()> {
300 while self.peek().is_ascii_digit() {
301 self.advance();
302 }
303
304 if self.peek() == '.' && self.peek_advance().is_ascii_digit() {
305 self.advance();
306
307 while self.peek().is_ascii_digit() {
308 self.advance();
309 }
310 }
311 let substring = &self.source[self.start..self.current];
312 let value = substring.parse::<f64>();
313
314 match value {
315 Ok(value) => self.add_token_lit(Number, Some(LiteralValue::Number(value))),
316 Err(_) => {
317 let labels = vec![LabeledSpan::at(self.current_span(), "could not parse")];
318
319 let error = miette!(
320 labels = labels,
321 code = "lexer::unknown_token",
322 help = "this token might not be a valid number",
323 "{} failed to parse `{}` into number",
324 self.location_string(),
325 substring
326 )
327 .with_source_code(self.source.clone());
328
329 return Err(error);
330 }
331 }
332
333 Ok(())
334 }
335
336 fn identifier(&mut self) {
337 while self.peek().is_alphanumeric() || self.peek() == '_' {
338 self.advance();
339 }
340 let substring = &self.source[self.start..self.current];
341 if let Some(keyword_token_type) = self.keywords.get(substring) {
342 self.add_token(keyword_token_type.clone());
343 } else {
344 self.add_token(Identifier)
345 }
346 }
347
348 fn peek_advance(&self) -> char {
349 if self.current + 1 >= self.source.len() {
350 return '\0';
351 }
352
353 self.source.chars().nth(self.current + 1).unwrap()
354 }
355 fn peek(&self) -> char {
356 if self.is_at_end() {
357 return '\0';
358 }
359 self.source.chars().nth(self.current).unwrap()
360 }
361
362 fn advance(&mut self) -> char {
363 let c = self.source.chars().nth(self.current).unwrap();
364 self.current += 1;
365
366 c
367 }
368
369 fn check_next(&self, ch: char) -> bool {
370 if self.is_at_end() {
371 return false;
372 }
373
374 let mut i = 1;
375 loop {
376 let next_char = self.source.chars().nth(self.current + i);
377
378 match next_char {
379 None => {
381 break false;
382 }
383 Some(next_char) => {
384 if next_char.is_whitespace() {
385 i += 1;
386 } else {
387 return next_char == ch;
388 }
389 }
390 }
391 }
392 }
393
394 fn add_token(&mut self, token_type: TokenType) {
395 self.add_token_lit(token_type, None)
396 }
397
398 fn add_token_lit(&mut self, token_type: TokenType, literal: Option<LiteralValue>) {
399 let text = self
400 .source
401 .get(self.start..self.current)
402 .expect("Internal Compiler Error, This is a BUG")
403 .to_string();
404
405 let span_len = self.current - self.start;
406
407 self.tokens.push(Token {
408 token_type,
409 lexeme: text,
410 literal,
411 line_number: self.line,
412 span: SourceSpan::new(self.start.into(), span_len),
413 source: self.source.clone(), });
415 }
416
417 fn char_match(&mut self, ch: char) -> bool {
418 if self.is_at_end() {
419 return false;
420 }
421
422 if self.source.chars().nth(self.current).unwrap() != ch {
423 false
424 } else {
425 self.current += 1;
426 true
427 }
428 }
429
430 fn current_span(&self) -> SourceSpan {
431 SourceSpan::from(self.start..self.current)
432 }
433
434 fn location_string(&self) -> impl Display {
436 let string = format!("{}:{}:{}", self.file_name, self.line, self.start);
437 let string = string.bold();
438 let string = string.red();
439 format!("{string}")
440 }
441}