1use crate::lexer::codes::{is_token_type, token_type};
2use crate::lexer::cursor::{Cursor, CursorItem};
3use crate::lexer::error::LexerResult;
4use crate::lexer::token::{
5 Bracket, ComparisonOperator, Identifier, LogicalOperator, Operator, Token,
6 TokenKind,
7};
8use crate::lexer::{LexerError, QuotationMark, TemplateString};
9use std::str::FromStr;
10
11#[derive(Debug, Default)]
14pub struct Lexer<'arena> {
15 tokens: Vec<Token<'arena>>, }
17
18impl<'arena> Lexer<'arena> {
19 pub fn new() -> Self {
21 Self::default()
22 }
23
24 pub fn tokenize(
27 &mut self,
28 source: &'arena str,
29 ) -> LexerResult<&[Token<'arena>]> {
30 self.tokens.clear();
31
32 Scanner::new(source, &mut self.tokens).scan()?;
33 Ok(&self.tokens)
34 }
35}
36
37struct Scanner<'arena, 'self_ref> {
40 cursor: Cursor<'arena>, tokens: &'self_ref mut Vec<Token<'arena>>, source: &'arena str, }
44
45impl<'arena, 'self_ref> Scanner<'arena, 'self_ref> {
46 pub fn new(
48 source: &'arena str,
49 tokens: &'self_ref mut Vec<Token<'arena>>,
50 ) -> Self {
51 Self { cursor: Cursor::from(source), source, tokens }
52 }
53
54 pub fn scan(&mut self) -> LexerResult<()> {
57 while let Some(cursor_item) = self.cursor.peek() {
58 self.scan_cursor_item(cursor_item)?;
59 }
60
61 Ok(())
62 }
63
64 pub(crate) fn scan_cursor_item(
67 &mut self,
68 cursor_item: CursorItem,
69 ) -> LexerResult<()> {
70 let (i, s) = cursor_item;
71
72 match s {
73 token_type!("space") => {
75 self.cursor.next();
76 Ok(())
77 },
78 '\'' => self.string(QuotationMark::SingleQuote), '"' => self.string(QuotationMark::DoubleQuote), token_type!("digit") => self.number(), token_type!("bracket") => self.bracket(), token_type!("cmp_operator") => self.cmp_operator(), token_type!("operator") => self.operator(), token_type!("question_mark") => self.question_mark(), '`' => self.template_string(), '.' => self.dot(), token_type!("alpha") => self.identifier(), _ => Err(LexerError::UnmatchedSymbol {
89 symbol: s,
91 position: i as u32,
92 }),
93 }
94 }
95
96 fn next(&self) -> LexerResult<CursorItem> {
99 self.cursor.next().ok_or_else(|| {
100 let (a, b) = self.cursor.peek_back().unwrap_or((0, ' '));
101
102 LexerError::UnexpectedEof { symbol: b, position: a as u32 }
103 })
104 }
105
106 fn push(
108 &mut self,
109 token: Token<'arena>,
110 ) {
111 self.tokens.push(token);
112 }
113
114 fn template_string(&mut self) -> LexerResult<()> {
117 let (start, _) = self.next()?;
118
119 self.tokens.push(Token {
121 kind: TokenKind::QuotationMark(QuotationMark::Backtick),
122 span: (start as u32, (start + 1) as u32),
123 value: QuotationMark::Backtick.into(),
124 });
125
126 let mut in_expression = false; let mut str_start = start + 1; loop {
129 let (e, c) = self.next()?;
130
131 match (c, in_expression) {
132 ('`', _) => {
134 if str_start < e {
135 self.tokens.push(Token {
137 kind: TokenKind::Literal,
138 span: (str_start as u32, e as u32),
139 value: &self.source[str_start..e],
140 });
141 }
142
143 self.tokens.push(Token {
145 kind: TokenKind::QuotationMark(QuotationMark::Backtick),
146 span: (e as u32, (e + 1) as u32),
147 value: QuotationMark::Backtick.into(),
148 });
149
150 break;
151 },
152 ('$', false) => {
154 in_expression = self.cursor.next_if_is("{");
155 if in_expression {
156 self.tokens.push(Token {
158 kind: TokenKind::Literal,
159 span: (str_start as u32, e as u32),
160 value: &self.source[str_start..e],
161 });
162
163 self.tokens.push(Token {
165 kind: TokenKind::TemplateString(
166 TemplateString::ExpressionStart,
167 ),
168 span: (e as u32, (e + 2) as u32),
169 value: TemplateString::ExpressionStart.into(),
170 });
171 }
172 },
173 ('}', true) => {
175 in_expression = false;
176 self.tokens.push(Token {
177 kind: TokenKind::TemplateString(
178 TemplateString::ExpressionEnd,
179 ),
180 span: (str_start as u32, e as u32),
181 value: TemplateString::ExpressionEnd.into(),
182 });
183
184 str_start = e + 1;
185 },
186 (_, false) => {
188 },
190 (_, true) => {
192 self.cursor.back();
193 self.scan_cursor_item((e, c))?;
194 },
195 }
196 }
197
198 Ok(())
199 }
200
201 fn string(
203 &mut self,
204 quote_kind: QuotationMark,
205 ) -> LexerResult<()> {
206 let (start, opener) = self.next()?;
207 let end: usize;
208
209 loop {
211 let (e, c) = self.next()?;
212 if c == opener {
213 end = e;
214 break;
215 }
216 }
217
218 self.push(Token {
220 kind: TokenKind::QuotationMark(quote_kind),
221 span: (start as u32, (start + 1) as u32),
222 value: quote_kind.into(),
223 });
224
225 self.push(Token {
227 kind: TokenKind::Literal,
228 span: ((start + 1) as u32, end as u32),
229 value: &self.source[start + 1..end],
230 });
231
232 self.push(Token {
234 kind: TokenKind::QuotationMark(quote_kind),
235 span: (end as u32, (end + 1) as u32),
236 value: quote_kind.into(),
237 });
238
239 Ok(())
240 }
241
242 fn number(&mut self) -> LexerResult<()> {
245 let (start, _) = self.next()?;
246 let mut end = start;
247 let mut fractal = false; while let Some((e, c)) = self
251 .cursor
252 .next_if(|c| is_token_type!(c, "digit") || c == '_' || c == '.')
253 {
254 if fractal && c == '.' {
256 self.cursor.back();
257 break;
258 }
259
260 if c == '.' {
261 if let Some((_, p)) = self.cursor.peek() {
263 if p == '.' {
264 self.cursor.back();
265 break;
266 }
267
268 fractal = true
269 }
270 }
271
272 end = e;
273 }
274
275 if let Some((e_pos, _)) = self.cursor.next_if(|c| c == 'e') {
277 end = e_pos;
278
279 if let Some((sign_pos, _)) =
281 self.cursor.next_if(|c| c == '+' || c == '-')
282 {
283 end = sign_pos;
284 }
285
286 let mut has_exponent_digits = false;
288 while let Some((exp_pos, _)) =
289 self.cursor.next_if(|c| is_token_type!(c, "digit"))
290 {
291 end = exp_pos;
292 has_exponent_digits = true;
293 }
294
295 if !has_exponent_digits {
297 while self.cursor.position() > e_pos {
298 self.cursor.back();
299 }
300
301 end = e_pos - 1;
302 }
303 }
304
305 self.push(Token {
307 kind: TokenKind::Number,
308 span: (start as u32, (end + 1) as u32),
309 value: &self.source[start..=end],
310 });
311
312 Ok(())
313 }
314
315 fn bracket(&mut self) -> LexerResult<()> {
317 let (start, _) = self.next()?;
318
319 let value = &self.source[start..=start];
320 let span = (start as u32, (start + 1) as u32);
321 self.push(Token {
322 kind: TokenKind::Bracket(Bracket::from_str(value).map_err(
323 |_| LexerError::UnexpectedSymbol {
324 symbol: value.to_string(),
325 span,
326 },
327 )?),
328 span,
329 value,
330 });
331
332 Ok(())
333 }
334
335 fn dot(&mut self) -> LexerResult<()> {
338 let (start, _) = self.next()?;
339 let mut end = start;
340
341 if self.cursor.next_if(|c| c == '.').is_some() {
343 end += 1;
344 }
345
346 let value = &self.source[start..=end];
347 let span = (start as u32, (end + 1) as u32);
348 self.push(Token {
349 kind: TokenKind::Operator(Operator::from_str(value).map_err(
350 |_| LexerError::UnexpectedSymbol {
351 symbol: value.to_string(),
352 span,
353 },
354 )?),
355 span,
356 value,
357 });
358
359 Ok(())
360 }
361
362 fn cmp_operator(&mut self) -> LexerResult<()> {
365 let (start, _) = self.next()?;
366 let mut end = start;
367
368 if self.cursor.next_if(|c| c == '=').is_some() {
370 end += 1;
371 }
372
373 let value = &self.source[start..=end];
374 self.push(Token {
375 kind: TokenKind::Operator(Operator::from_str(value).map_err(
376 |_| LexerError::UnexpectedSymbol {
377 symbol: value.to_string(),
378 span: (start as u32, (end + 1) as u32),
379 },
380 )?),
381 span: (start as u32, (end + 1) as u32),
382 value,
383 });
384
385 Ok(())
386 }
387
388 fn question_mark(&mut self) -> LexerResult<()> {
391 let (start, _) = self.next()?;
392 let mut kind = TokenKind::Operator(Operator::QuestionMark);
393 let mut end = start;
394
395 if self.cursor.next_if(|c| c == '?').is_some() {
397 kind = TokenKind::Operator(Operator::Logical(
398 LogicalOperator::NullishCoalescing,
399 ));
400 end += 1;
401 }
402
403 let value = &self.source[start..=end];
404 self.push(Token {
405 kind,
406 value,
407 span: (start as u32, (end + 1) as u32),
408 });
409
410 Ok(())
411 }
412
413 fn operator(&mut self) -> LexerResult<()> {
416 let (start, _) = self.next()?;
417
418 let value = &self.source[start..=start];
419 let span = (start as u32, (start + 1) as u32);
420 self.push(Token {
421 kind: TokenKind::Operator(Operator::from_str(value).map_err(
422 |_| LexerError::UnexpectedSymbol {
423 symbol: value.to_string(),
424 span,
425 },
426 )?),
427 span,
428 value,
429 });
430
431 Ok(())
432 }
433
434 fn not(
437 &mut self,
438 start: usize,
439 ) -> LexerResult<()> {
440 if self.cursor.next_if_is(" in ") {
441 let end = self.cursor.position();
443
444 self.push(Token {
445 kind: TokenKind::Operator(Operator::Comparison(
446 ComparisonOperator::NotIn,
447 )),
448 span: (start as u32, (end - 1) as u32),
449 value: "not in",
450 })
451 } else {
452 let end = self.cursor.position();
454
455 self.push(Token {
456 kind: TokenKind::Operator(Operator::Logical(
457 LogicalOperator::Not,
458 )),
459 span: (start as u32, end as u32),
460 value: "not",
461 })
462 }
463
464 Ok(())
465 }
466
467 fn identifier(&mut self) -> LexerResult<()> {
470 let (start, _) = self.next()?;
471 let mut end = start;
472
473 while let Some((e, _)) =
475 self.cursor.next_if(|c| is_token_type!(c, "alphanumeric"))
476 {
477 end = e;
478 }
479
480 let value = &self.source[start..=end];
481 match value {
482 "and" => self.push(Token {
484 kind: TokenKind::Operator(Operator::Logical(
485 LogicalOperator::And,
486 )),
487 span: (start as u32, (end + 1) as u32),
488 value,
489 }),
490 "or" => self.push(Token {
491 kind: TokenKind::Operator(Operator::Logical(
492 LogicalOperator::Or,
493 )),
494 span: (start as u32, (end + 1) as u32),
495 value,
496 }),
497 "in" => self.push(Token {
499 kind: TokenKind::Operator(Operator::Comparison(
500 ComparisonOperator::In,
501 )),
502 span: (start as u32, (end + 1) as u32),
503 value,
504 }),
505 "true" => self.push(Token {
507 kind: TokenKind::Boolean(true),
508 span: (start as u32, (end + 1) as u32),
509 value,
510 }),
511 "false" => self.push(Token {
512 kind: TokenKind::Boolean(false),
513 span: (start as u32, (end + 1) as u32),
514 value,
515 }),
516 "not" => self.not(start)?,
518 _ => self.push(Token {
520 kind: Identifier::try_from(value)
521 .map(|identifier| TokenKind::Identifier(identifier))
522 .unwrap_or(TokenKind::Literal),
523 span: (start as u32, (end + 1) as u32),
524 value,
525 }),
526 }
527
528 Ok(())
529 }
530}