1use crate::lexer::codes::{is_token_type, token_type};
2use crate::lexer::cursor::{Cursor, CursorItem};
3use crate::lexer::error::LexerResult;
4use crate::lexer::token::{
5    Bracket, ComparisonOperator, Identifier, LogicalOperator, Operator, Token, TokenKind,
6};
7use crate::lexer::{LexerError, QuotationMark, TemplateString};
8use std::str::FromStr;
9
10#[derive(Debug, Default)]
11pub struct Lexer<'arena> {
12    tokens: Vec<Token<'arena>>,
13}
14
15impl<'arena> Lexer<'arena> {
16    pub fn new() -> Self {
17        Self::default()
18    }
19
20    pub fn tokenize(&mut self, source: &'arena str) -> LexerResult<&[Token<'arena>]> {
21        self.tokens.clear();
22
23        Scanner::new(source, &mut self.tokens).scan()?;
24        Ok(&self.tokens)
25    }
26}
27
28struct Scanner<'arena, 'self_ref> {
29    cursor: Cursor<'arena>,
30    tokens: &'self_ref mut Vec<Token<'arena>>,
31    source: &'arena str,
32}
33
34impl<'arena, 'self_ref> Scanner<'arena, 'self_ref> {
35    pub fn new(source: &'arena str, tokens: &'self_ref mut Vec<Token<'arena>>) -> Self {
36        Self {
37            cursor: Cursor::from(source),
38            source,
39            tokens,
40        }
41    }
42
43    pub fn scan(&mut self) -> LexerResult<()> {
44        while let Some(cursor_item) = self.cursor.peek() {
45            self.scan_cursor_item(cursor_item)?;
46        }
47
48        Ok(())
49    }
50
51    pub(crate) fn scan_cursor_item(&mut self, cursor_item: CursorItem) -> LexerResult<()> {
52        let (i, s) = cursor_item;
53
54        match s {
55            token_type!("space") => {
56                self.cursor.next();
57                Ok(())
58            }
59            '\'' => self.string(QuotationMark::SingleQuote),
60            '"' => self.string(QuotationMark::DoubleQuote),
61            token_type!("digit") => self.number(),
62            token_type!("bracket") => self.bracket(),
63            token_type!("cmp_operator") => self.cmp_operator(),
64            token_type!("operator") => self.operator(),
65            token_type!("question_mark") => self.question_mark(),
66            '=' => self.equals(),
67            '`' => self.template_string(),
68            '.' => self.dot(),
69            ';' => self.semi(),
70            token_type!("alpha") => self.identifier(),
71            _ => Err(LexerError::UnmatchedSymbol {
72                symbol: s,
73                position: i as u32,
74            }),
75        }
76    }
77
78    fn next(&self) -> LexerResult<CursorItem> {
79        self.cursor.next().ok_or_else(|| {
80            let (a, b) = self.cursor.peek_back().unwrap_or((0, ' '));
81
82            LexerError::UnexpectedEof {
83                symbol: b,
84                position: a as u32,
85            }
86        })
87    }
88
89    fn push(&mut self, token: Token<'arena>) {
90        self.tokens.push(token);
91    }
92
93    fn template_string(&mut self) -> LexerResult<()> {
94        let (start, _) = self.next()?;
95
96        self.tokens.push(Token {
97            kind: TokenKind::QuotationMark(QuotationMark::Backtick),
98            span: (start as u32, (start + 1) as u32),
99            value: QuotationMark::Backtick.into(),
100        });
101
102        let mut in_expression = false;
103        let mut str_start = start + 1;
104        loop {
105            let (e, c) = self.next()?;
106
107            match (c, in_expression) {
108                ('`', _) => {
109                    if str_start < e {
110                        self.tokens.push(Token {
111                            kind: TokenKind::Literal,
112                            span: (str_start as u32, e as u32),
113                            value: &self.source[str_start..e],
114                        });
115                    }
116
117                    self.tokens.push(Token {
118                        kind: TokenKind::QuotationMark(QuotationMark::Backtick),
119                        span: (e as u32, (e + 1) as u32),
120                        value: QuotationMark::Backtick.into(),
121                    });
122
123                    break;
124                }
125                ('$', false) => {
126                    in_expression = self.cursor.next_if_is("{");
127                    if in_expression {
128                        self.tokens.push(Token {
129                            kind: TokenKind::Literal,
130                            span: (str_start as u32, e as u32),
131                            value: &self.source[str_start..e],
132                        });
133
134                        self.tokens.push(Token {
135                            kind: TokenKind::TemplateString(TemplateString::ExpressionStart),
136                            span: (e as u32, (e + 2) as u32),
137                            value: TemplateString::ExpressionStart.into(),
138                        });
139                    }
140                }
141                ('}', true) => {
142                    in_expression = false;
143                    self.tokens.push(Token {
144                        kind: TokenKind::TemplateString(TemplateString::ExpressionEnd),
145                        span: (str_start as u32, e as u32),
146                        value: TemplateString::ExpressionEnd.into(),
147                    });
148
149                    str_start = e + 1;
150                }
151                (_, false) => {
152                    }
154                (_, true) => {
155                    self.cursor.back();
156                    self.scan_cursor_item((e, c))?;
157                }
158            }
159        }
160
161        Ok(())
162    }
163
164    fn string(&mut self, quote_kind: QuotationMark) -> LexerResult<()> {
165        let (start, opener) = self.next()?;
166        let end: usize;
167
168        loop {
169            let (e, c) = self.next()?;
170            if c == opener {
171                end = e;
172                break;
173            }
174        }
175
176        self.push(Token {
177            kind: TokenKind::QuotationMark(quote_kind),
178            span: (start as u32, (start + 1) as u32),
179            value: quote_kind.into(),
180        });
181
182        self.push(Token {
183            kind: TokenKind::Literal,
184            span: ((start + 1) as u32, end as u32),
185            value: &self.source[start + 1..end],
186        });
187
188        self.push(Token {
189            kind: TokenKind::QuotationMark(quote_kind),
190            span: (end as u32, (end + 1) as u32),
191            value: quote_kind.into(),
192        });
193
194        Ok(())
195    }
196
197    fn number(&mut self) -> LexerResult<()> {
198        let (start, _) = self.next()?;
199        let mut end = start;
200        let mut fractal = false;
201
202        while let Some((e, c)) = self
203            .cursor
204            .next_if(|c| is_token_type!(c, "digit") || c == '_' || c == '.')
205        {
206            if fractal && c == '.' {
207                self.cursor.back();
208                break;
209            }
210
211            if c == '.' {
212                if let Some((_, p)) = self.cursor.peek() {
213                    if p == '.' {
214                        self.cursor.back();
215                        break;
216                    }
217
218                    fractal = true
219                }
220            }
221
222            end = e;
223        }
224
225        if let Some((e_pos, _)) = self.cursor.next_if(|c| c == 'e') {
226            end = e_pos;
227
228            if let Some((sign_pos, _)) = self.cursor.next_if(|c| c == '+' || c == '-') {
229                end = sign_pos;
230            }
231
232            let mut has_exponent_digits = false;
233            while let Some((exp_pos, _)) = self.cursor.next_if(|c| is_token_type!(c, "digit")) {
234                end = exp_pos;
235                has_exponent_digits = true;
236            }
237
238            if !has_exponent_digits {
239                while self.cursor.position() > e_pos {
240                    self.cursor.back();
241                }
242
243                end = e_pos - 1;
244            }
245        }
246
247        self.push(Token {
248            kind: TokenKind::Number,
249            span: (start as u32, (end + 1) as u32),
250            value: &self.source[start..=end],
251        });
252
253        Ok(())
254    }
255
256    fn bracket(&mut self) -> LexerResult<()> {
257        let (start, _) = self.next()?;
258
259        let value = &self.source[start..=start];
260        let span = (start as u32, (start + 1) as u32);
261        self.push(Token {
262            kind: TokenKind::Bracket(Bracket::from_str(value).map_err(|_| {
263                LexerError::UnexpectedSymbol {
264                    symbol: value.to_string(),
265                    span,
266                }
267            })?),
268            span,
269            value,
270        });
271
272        Ok(())
273    }
274
275    fn dot(&mut self) -> LexerResult<()> {
276        let (start, _) = self.next()?;
277        let mut end = start;
278
279        if self.cursor.next_if(|c| c == '.').is_some() {
280            end += 1;
281        }
282
283        let value = &self.source[start..=end];
284        let span = (start as u32, (end + 1) as u32);
285        self.push(Token {
286            kind: TokenKind::Operator(Operator::from_str(value).map_err(|_| {
287                LexerError::UnexpectedSymbol {
288                    symbol: value.to_string(),
289                    span,
290                }
291            })?),
292            span,
293            value,
294        });
295
296        Ok(())
297    }
298
299    fn cmp_operator(&mut self) -> LexerResult<()> {
300        let (start, _) = self.next()?;
301        let mut end = start;
302
303        if self.cursor.next_if(|c| c == '=').is_some() {
304            end += 1;
305        }
306
307        let value = &self.source[start..=end];
308        self.push(Token {
309            kind: TokenKind::Operator(Operator::from_str(value).map_err(|_| {
310                LexerError::UnexpectedSymbol {
311                    symbol: value.to_string(),
312                    span: (start as u32, (end + 1) as u32),
313                }
314            })?),
315            span: (start as u32, (end + 1) as u32),
316            value,
317        });
318
319        Ok(())
320    }
321
322    fn semi(&mut self) -> LexerResult<()> {
323        let (start, _) = self.next()?;
324        self.push(Token {
325            kind: TokenKind::Operator(Operator::Semi),
326            span: (start as u32, (start + 1) as u32),
327            value: &self.source[start..=start],
328        });
329
330        Ok(())
331    }
332
333    fn equals(&mut self) -> LexerResult<()> {
334        let (start, _) = self.next()?;
335        let Some((end, _)) = self.cursor.next_if(|c| c == '=') else {
336            self.push(Token {
337                kind: TokenKind::Operator(Operator::Assign),
338                span: (start as u32, (start + 1) as u32),
339                value: &self.source[start..=start],
340            });
341
342            return Ok(());
343        };
344
345        self.push(Token {
346            kind: TokenKind::Operator(Operator::Comparison(ComparisonOperator::Equal)),
347            span: (start as u32, (end + 1) as u32),
348            value: &self.source[start..=end],
349        });
350
351        Ok(())
352    }
353
354    fn question_mark(&mut self) -> LexerResult<()> {
355        let (start, _) = self.next()?;
356        let mut kind = TokenKind::Operator(Operator::QuestionMark);
357        let mut end = start;
358
359        if self.cursor.next_if(|c| c == '?').is_some() {
360            kind = TokenKind::Operator(Operator::Logical(LogicalOperator::NullishCoalescing));
361            end += 1;
362        }
363
364        let value = &self.source[start..=end];
365        self.push(Token {
366            kind,
367            value,
368            span: (start as u32, (end + 1) as u32),
369        });
370
371        Ok(())
372    }
373
374    fn operator(&mut self) -> LexerResult<()> {
375        let (start, _) = self.next()?;
376
377        let value = &self.source[start..=start];
378        let span = (start as u32, (start + 1) as u32);
379        self.push(Token {
380            kind: TokenKind::Operator(Operator::from_str(value).map_err(|_| {
381                LexerError::UnexpectedSymbol {
382                    symbol: value.to_string(),
383                    span,
384                }
385            })?),
386            span,
387            value,
388        });
389
390        Ok(())
391    }
392
393    fn not(&mut self, start: usize) -> LexerResult<()> {
394        if self.cursor.next_if_is(" in ") {
395            let end = self.cursor.position();
396
397            self.push(Token {
398                kind: TokenKind::Operator(Operator::Comparison(ComparisonOperator::NotIn)),
399                span: (start as u32, (end - 1) as u32),
400                value: "not in",
401            })
402        } else {
403            let end = self.cursor.position();
404
405            self.push(Token {
406                kind: TokenKind::Operator(Operator::Logical(LogicalOperator::Not)),
407                span: (start as u32, end as u32),
408                value: "not",
409            })
410        }
411
412        Ok(())
413    }
414
415    fn identifier(&mut self) -> LexerResult<()> {
416        let (start, _) = self.next()?;
417        let mut end = start;
418
419        while let Some((e, _)) = self.cursor.next_if(|c| is_token_type!(c, "alphanumeric")) {
420            end = e;
421        }
422
423        let value = &self.source[start..=end];
424        match value {
425            "and" => self.push(Token {
426                kind: TokenKind::Operator(Operator::Logical(LogicalOperator::And)),
427                span: (start as u32, (end + 1) as u32),
428                value,
429            }),
430            "or" => self.push(Token {
431                kind: TokenKind::Operator(Operator::Logical(LogicalOperator::Or)),
432                span: (start as u32, (end + 1) as u32),
433                value,
434            }),
435            "in" => self.push(Token {
436                kind: TokenKind::Operator(Operator::Comparison(ComparisonOperator::In)),
437                span: (start as u32, (end + 1) as u32),
438                value,
439            }),
440            "true" => self.push(Token {
441                kind: TokenKind::Boolean(true),
442                span: (start as u32, (end + 1) as u32),
443                value,
444            }),
445            "false" => self.push(Token {
446                kind: TokenKind::Boolean(false),
447                span: (start as u32, (end + 1) as u32),
448                value,
449            }),
450            "not" => self.not(start)?,
451            _ => self.push(Token {
452                kind: Identifier::try_from(value)
453                    .map(|identifier| TokenKind::Identifier(identifier))
454                    .unwrap_or(TokenKind::Literal),
455                span: (start as u32, (end + 1) as u32),
456                value,
457            }),
458        }
459
460        Ok(())
461    }
462}