1use crate::lexer::codes::{is_token_type, token_type};
2use crate::lexer::cursor::{Cursor, CursorItem};
3use crate::lexer::error::LexerResult;
4use crate::lexer::token::{
5 Bracket, ComparisonOperator, Identifier, LogicalOperator, Operator, Token, TokenKind,
6};
7use crate::lexer::{LexerError, QuotationMark, TemplateString};
8use std::str::FromStr;
9
10#[derive(Debug, Default)]
11pub struct Lexer<'arena> {
12 tokens: Vec<Token<'arena>>,
13}
14
15impl<'arena> Lexer<'arena> {
16 pub fn new() -> Self {
17 Self::default()
18 }
19
20 pub fn tokenize(&mut self, source: &'arena str) -> LexerResult<&[Token<'arena>]> {
21 self.tokens.clear();
22
23 Scanner::new(source, &mut self.tokens).scan()?;
24 Ok(&self.tokens)
25 }
26}
27
28struct Scanner<'arena, 'self_ref> {
29 cursor: Cursor<'arena>,
30 tokens: &'self_ref mut Vec<Token<'arena>>,
31 source: &'arena str,
32}
33
34impl<'arena, 'self_ref> Scanner<'arena, 'self_ref> {
35 pub fn new(source: &'arena str, tokens: &'self_ref mut Vec<Token<'arena>>) -> Self {
36 Self {
37 cursor: Cursor::from(source),
38 source,
39 tokens,
40 }
41 }
42
43 pub fn scan(&mut self) -> LexerResult<()> {
44 while let Some(cursor_item) = self.cursor.peek() {
45 self.scan_cursor_item(cursor_item)?;
46 }
47
48 Ok(())
49 }
50
51 pub(crate) fn scan_cursor_item(&mut self, cursor_item: CursorItem) -> LexerResult<()> {
52 let (i, s) = cursor_item;
53
54 match s {
55 token_type!("space") => {
56 self.cursor.next();
57 Ok(())
58 }
59 '\'' => self.string(QuotationMark::SingleQuote),
60 '"' => self.string(QuotationMark::DoubleQuote),
61 token_type!("digit") => self.number(),
62 token_type!("bracket") => self.bracket(),
63 token_type!("cmp_operator") => self.cmp_operator(),
64 token_type!("operator") => self.operator(),
65 token_type!("question_mark") => self.question_mark(),
66 '=' => self.equals(),
67 '`' => self.template_string(),
68 '.' => self.dot(),
69 ';' => self.semi(),
70 token_type!("alpha") => self.identifier(),
71 _ => Err(LexerError::UnmatchedSymbol {
72 symbol: s,
73 position: i as u32,
74 }),
75 }
76 }
77
78 fn next(&self) -> LexerResult<CursorItem> {
79 self.cursor.next().ok_or_else(|| {
80 let (a, b) = self.cursor.peek_back().unwrap_or((0, ' '));
81
82 LexerError::UnexpectedEof {
83 symbol: b,
84 position: a as u32,
85 }
86 })
87 }
88
89 fn push(&mut self, token: Token<'arena>) {
90 self.tokens.push(token);
91 }
92
93 fn template_string(&mut self) -> LexerResult<()> {
94 let (start, _) = self.next()?;
95
96 self.tokens.push(Token {
97 kind: TokenKind::QuotationMark(QuotationMark::Backtick),
98 span: (start as u32, (start + 1) as u32),
99 value: QuotationMark::Backtick.into(),
100 });
101
102 let mut in_expression = false;
103 let mut str_start = start + 1;
104 loop {
105 let (e, c) = self.next()?;
106
107 match (c, in_expression) {
108 ('`', _) => {
109 if str_start < e {
110 self.tokens.push(Token {
111 kind: TokenKind::Literal,
112 span: (str_start as u32, e as u32),
113 value: &self.source[str_start..e],
114 });
115 }
116
117 self.tokens.push(Token {
118 kind: TokenKind::QuotationMark(QuotationMark::Backtick),
119 span: (e as u32, (e + 1) as u32),
120 value: QuotationMark::Backtick.into(),
121 });
122
123 break;
124 }
125 ('$', false) => {
126 in_expression = self.cursor.next_if_is("{");
127 if in_expression {
128 self.tokens.push(Token {
129 kind: TokenKind::Literal,
130 span: (str_start as u32, e as u32),
131 value: &self.source[str_start..e],
132 });
133
134 self.tokens.push(Token {
135 kind: TokenKind::TemplateString(TemplateString::ExpressionStart),
136 span: (e as u32, (e + 2) as u32),
137 value: TemplateString::ExpressionStart.into(),
138 });
139 }
140 }
141 ('}', true) => {
142 in_expression = false;
143 self.tokens.push(Token {
144 kind: TokenKind::TemplateString(TemplateString::ExpressionEnd),
145 span: (str_start as u32, e as u32),
146 value: TemplateString::ExpressionEnd.into(),
147 });
148
149 str_start = e + 1;
150 }
151 (_, false) => {
152 }
154 (_, true) => {
155 self.cursor.back();
156 self.scan_cursor_item((e, c))?;
157 }
158 }
159 }
160
161 Ok(())
162 }
163
164 fn string(&mut self, quote_kind: QuotationMark) -> LexerResult<()> {
165 let (start, opener) = self.next()?;
166 let end: usize;
167
168 loop {
169 let (e, c) = self.next()?;
170 if c == opener {
171 end = e;
172 break;
173 }
174 }
175
176 self.push(Token {
177 kind: TokenKind::QuotationMark(quote_kind),
178 span: (start as u32, (start + 1) as u32),
179 value: quote_kind.into(),
180 });
181
182 self.push(Token {
183 kind: TokenKind::Literal,
184 span: ((start + 1) as u32, end as u32),
185 value: &self.source[start + 1..end],
186 });
187
188 self.push(Token {
189 kind: TokenKind::QuotationMark(quote_kind),
190 span: (end as u32, (end + 1) as u32),
191 value: quote_kind.into(),
192 });
193
194 Ok(())
195 }
196
197 fn number(&mut self) -> LexerResult<()> {
198 let (start, _) = self.next()?;
199 let mut end = start;
200 let mut fractal = false;
201
202 while let Some((e, c)) = self
203 .cursor
204 .next_if(|c| is_token_type!(c, "digit") || c == '_' || c == '.')
205 {
206 if fractal && c == '.' {
207 self.cursor.back();
208 break;
209 }
210
211 if c == '.' {
212 if let Some((_, p)) = self.cursor.peek() {
213 if p == '.' {
214 self.cursor.back();
215 break;
216 }
217
218 fractal = true
219 }
220 }
221
222 end = e;
223 }
224
225 if let Some((e_pos, _)) = self.cursor.next_if(|c| c == 'e') {
226 end = e_pos;
227
228 if let Some((sign_pos, _)) = self.cursor.next_if(|c| c == '+' || c == '-') {
229 end = sign_pos;
230 }
231
232 let mut has_exponent_digits = false;
233 while let Some((exp_pos, _)) = self.cursor.next_if(|c| is_token_type!(c, "digit")) {
234 end = exp_pos;
235 has_exponent_digits = true;
236 }
237
238 if !has_exponent_digits {
239 while self.cursor.position() > e_pos {
240 self.cursor.back();
241 }
242
243 end = e_pos - 1;
244 }
245 }
246
247 self.push(Token {
248 kind: TokenKind::Number,
249 span: (start as u32, (end + 1) as u32),
250 value: &self.source[start..=end],
251 });
252
253 Ok(())
254 }
255
256 fn bracket(&mut self) -> LexerResult<()> {
257 let (start, _) = self.next()?;
258
259 let value = &self.source[start..=start];
260 let span = (start as u32, (start + 1) as u32);
261 self.push(Token {
262 kind: TokenKind::Bracket(Bracket::from_str(value).map_err(|_| {
263 LexerError::UnexpectedSymbol {
264 symbol: value.to_string(),
265 span,
266 }
267 })?),
268 span,
269 value,
270 });
271
272 Ok(())
273 }
274
275 fn dot(&mut self) -> LexerResult<()> {
276 let (start, _) = self.next()?;
277 let mut end = start;
278
279 if self.cursor.next_if(|c| c == '.').is_some() {
280 end += 1;
281 }
282
283 let value = &self.source[start..=end];
284 let span = (start as u32, (end + 1) as u32);
285 self.push(Token {
286 kind: TokenKind::Operator(Operator::from_str(value).map_err(|_| {
287 LexerError::UnexpectedSymbol {
288 symbol: value.to_string(),
289 span,
290 }
291 })?),
292 span,
293 value,
294 });
295
296 Ok(())
297 }
298
299 fn cmp_operator(&mut self) -> LexerResult<()> {
300 let (start, _) = self.next()?;
301 let mut end = start;
302
303 if self.cursor.next_if(|c| c == '=').is_some() {
304 end += 1;
305 }
306
307 let value = &self.source[start..=end];
308 self.push(Token {
309 kind: TokenKind::Operator(Operator::from_str(value).map_err(|_| {
310 LexerError::UnexpectedSymbol {
311 symbol: value.to_string(),
312 span: (start as u32, (end + 1) as u32),
313 }
314 })?),
315 span: (start as u32, (end + 1) as u32),
316 value,
317 });
318
319 Ok(())
320 }
321
322 fn semi(&mut self) -> LexerResult<()> {
323 let (start, _) = self.next()?;
324 self.push(Token {
325 kind: TokenKind::Operator(Operator::Semi),
326 span: (start as u32, (start + 1) as u32),
327 value: &self.source[start..=start],
328 });
329
330 Ok(())
331 }
332
333 fn equals(&mut self) -> LexerResult<()> {
334 let (start, _) = self.next()?;
335 let Some((end, _)) = self.cursor.next_if(|c| c == '=') else {
336 self.push(Token {
337 kind: TokenKind::Operator(Operator::Assign),
338 span: (start as u32, (start + 1) as u32),
339 value: &self.source[start..=start],
340 });
341
342 return Ok(());
343 };
344
345 self.push(Token {
346 kind: TokenKind::Operator(Operator::Comparison(ComparisonOperator::Equal)),
347 span: (start as u32, (end + 1) as u32),
348 value: &self.source[start..=end],
349 });
350
351 Ok(())
352 }
353
354 fn question_mark(&mut self) -> LexerResult<()> {
355 let (start, _) = self.next()?;
356 let mut kind = TokenKind::Operator(Operator::QuestionMark);
357 let mut end = start;
358
359 if self.cursor.next_if(|c| c == '?').is_some() {
360 kind = TokenKind::Operator(Operator::Logical(LogicalOperator::NullishCoalescing));
361 end += 1;
362 }
363
364 let value = &self.source[start..=end];
365 self.push(Token {
366 kind,
367 value,
368 span: (start as u32, (end + 1) as u32),
369 });
370
371 Ok(())
372 }
373
374 fn operator(&mut self) -> LexerResult<()> {
375 let (start, _) = self.next()?;
376
377 let value = &self.source[start..=start];
378 let span = (start as u32, (start + 1) as u32);
379 self.push(Token {
380 kind: TokenKind::Operator(Operator::from_str(value).map_err(|_| {
381 LexerError::UnexpectedSymbol {
382 symbol: value.to_string(),
383 span,
384 }
385 })?),
386 span,
387 value,
388 });
389
390 Ok(())
391 }
392
393 fn not(&mut self, start: usize) -> LexerResult<()> {
394 if self.cursor.next_if_is(" in ") {
395 let end = self.cursor.position();
396
397 self.push(Token {
398 kind: TokenKind::Operator(Operator::Comparison(ComparisonOperator::NotIn)),
399 span: (start as u32, (end - 1) as u32),
400 value: "not in",
401 })
402 } else {
403 let end = self.cursor.position();
404
405 self.push(Token {
406 kind: TokenKind::Operator(Operator::Logical(LogicalOperator::Not)),
407 span: (start as u32, end as u32),
408 value: "not",
409 })
410 }
411
412 Ok(())
413 }
414
415 fn identifier(&mut self) -> LexerResult<()> {
416 let (start, _) = self.next()?;
417 let mut end = start;
418
419 while let Some((e, _)) = self.cursor.next_if(|c| is_token_type!(c, "alphanumeric")) {
420 end = e;
421 }
422
423 let value = &self.source[start..=end];
424 match value {
425 "and" => self.push(Token {
426 kind: TokenKind::Operator(Operator::Logical(LogicalOperator::And)),
427 span: (start as u32, (end + 1) as u32),
428 value,
429 }),
430 "or" => self.push(Token {
431 kind: TokenKind::Operator(Operator::Logical(LogicalOperator::Or)),
432 span: (start as u32, (end + 1) as u32),
433 value,
434 }),
435 "in" => self.push(Token {
436 kind: TokenKind::Operator(Operator::Comparison(ComparisonOperator::In)),
437 span: (start as u32, (end + 1) as u32),
438 value,
439 }),
440 "true" => self.push(Token {
441 kind: TokenKind::Boolean(true),
442 span: (start as u32, (end + 1) as u32),
443 value,
444 }),
445 "false" => self.push(Token {
446 kind: TokenKind::Boolean(false),
447 span: (start as u32, (end + 1) as u32),
448 value,
449 }),
450 "not" => self.not(start)?,
451 _ => self.push(Token {
452 kind: Identifier::try_from(value)
453 .map(|identifier| TokenKind::Identifier(identifier))
454 .unwrap_or(TokenKind::Literal),
455 span: (start as u32, (end + 1) as u32),
456 value,
457 }),
458 }
459
460 Ok(())
461 }
462}