zen_expression/lexer/
lexer.rs

1use crate::lexer::codes::{is_token_type, token_type};
2use crate::lexer::cursor::{Cursor, CursorItem};
3use crate::lexer::error::LexerResult;
4use crate::lexer::token::{
5    Bracket, ComparisonOperator, Identifier, LogicalOperator, Operator, Token, TokenKind,
6};
7use crate::lexer::{LexerError, QuotationMark, TemplateString};
8use std::str::FromStr;
9
10#[derive(Debug, Default)]
11pub struct Lexer<'arena> {
12    tokens: Vec<Token<'arena>>,
13}
14
15impl<'arena> Lexer<'arena> {
16    pub fn new() -> Self {
17        Self::default()
18    }
19
20    pub fn tokenize(&mut self, source: &'arena str) -> LexerResult<&[Token<'arena>]> {
21        self.tokens.clear();
22
23        Scanner::new(source, &mut self.tokens).scan()?;
24        Ok(&self.tokens)
25    }
26}
27
28struct Scanner<'arena, 'self_ref> {
29    cursor: Cursor<'arena>,
30    tokens: &'self_ref mut Vec<Token<'arena>>,
31    source: &'arena str,
32}
33
34impl<'arena, 'self_ref> Scanner<'arena, 'self_ref> {
35    pub fn new(source: &'arena str, tokens: &'self_ref mut Vec<Token<'arena>>) -> Self {
36        Self {
37            cursor: Cursor::from(source),
38            source,
39            tokens,
40        }
41    }
42
43    pub fn scan(&mut self) -> LexerResult<()> {
44        while let Some(cursor_item) = self.cursor.peek() {
45            self.scan_cursor_item(cursor_item)?;
46        }
47
48        Ok(())
49    }
50
51    pub(crate) fn scan_cursor_item(&mut self, cursor_item: CursorItem) -> LexerResult<()> {
52        let (i, s) = cursor_item;
53
54        match s {
55            token_type!("space") => {
56                self.cursor.next();
57                Ok(())
58            }
59            '\'' => self.string(QuotationMark::SingleQuote),
60            '"' => self.string(QuotationMark::DoubleQuote),
61            token_type!("digit") => self.number(),
62            token_type!("bracket") => self.bracket(),
63            token_type!("cmp_operator") => self.cmp_operator(),
64            token_type!("operator") => self.operator(),
65            token_type!("question_mark") => self.question_mark(),
66            '`' => self.template_string(),
67            '.' => self.dot(),
68            token_type!("alpha") => self.identifier(),
69            _ => Err(LexerError::UnmatchedSymbol {
70                symbol: s,
71                position: i as u32,
72            }),
73        }
74    }
75
76    fn next(&self) -> LexerResult<CursorItem> {
77        self.cursor.next().ok_or_else(|| {
78            let (a, b) = self.cursor.peek_back().unwrap_or((0, ' '));
79
80            LexerError::UnexpectedEof {
81                symbol: b,
82                position: a as u32,
83            }
84        })
85    }
86
87    fn push(&mut self, token: Token<'arena>) {
88        self.tokens.push(token);
89    }
90
91    fn template_string(&mut self) -> LexerResult<()> {
92        let (start, _) = self.next()?;
93
94        self.tokens.push(Token {
95            kind: TokenKind::QuotationMark(QuotationMark::Backtick),
96            span: (start as u32, (start + 1) as u32),
97            value: QuotationMark::Backtick.into(),
98        });
99
100        let mut in_expression = false;
101        let mut str_start = start + 1;
102        loop {
103            let (e, c) = self.next()?;
104
105            match (c, in_expression) {
106                ('`', _) => {
107                    if str_start < e {
108                        self.tokens.push(Token {
109                            kind: TokenKind::Literal,
110                            span: (str_start as u32, e as u32),
111                            value: &self.source[str_start..e],
112                        });
113                    }
114
115                    self.tokens.push(Token {
116                        kind: TokenKind::QuotationMark(QuotationMark::Backtick),
117                        span: (e as u32, (e + 1) as u32),
118                        value: QuotationMark::Backtick.into(),
119                    });
120
121                    break;
122                }
123                ('$', false) => {
124                    in_expression = self.cursor.next_if_is("{");
125                    if in_expression {
126                        self.tokens.push(Token {
127                            kind: TokenKind::Literal,
128                            span: (str_start as u32, e as u32),
129                            value: &self.source[str_start..e],
130                        });
131
132                        self.tokens.push(Token {
133                            kind: TokenKind::TemplateString(TemplateString::ExpressionStart),
134                            span: (e as u32, (e + 2) as u32),
135                            value: TemplateString::ExpressionStart.into(),
136                        });
137                    }
138                }
139                ('}', true) => {
140                    in_expression = false;
141                    self.tokens.push(Token {
142                        kind: TokenKind::TemplateString(TemplateString::ExpressionEnd),
143                        span: (str_start as u32, e as u32),
144                        value: TemplateString::ExpressionEnd.into(),
145                    });
146
147                    str_start = e + 1;
148                }
149                (_, false) => {
150                    // Continue reading string
151                }
152                (_, true) => {
153                    self.cursor.back();
154                    self.scan_cursor_item((e, c))?;
155                }
156            }
157        }
158
159        Ok(())
160    }
161
162    fn string(&mut self, quote_kind: QuotationMark) -> LexerResult<()> {
163        let (start, opener) = self.next()?;
164        let end: usize;
165
166        loop {
167            let (e, c) = self.next()?;
168            if c == opener {
169                end = e;
170                break;
171            }
172        }
173
174        self.push(Token {
175            kind: TokenKind::QuotationMark(quote_kind),
176            span: (start as u32, (start + 1) as u32),
177            value: quote_kind.into(),
178        });
179
180        self.push(Token {
181            kind: TokenKind::Literal,
182            span: ((start + 1) as u32, end as u32),
183            value: &self.source[start + 1..end],
184        });
185
186        self.push(Token {
187            kind: TokenKind::QuotationMark(quote_kind),
188            span: (end as u32, (end + 1) as u32),
189            value: quote_kind.into(),
190        });
191
192        Ok(())
193    }
194
195    fn number(&mut self) -> LexerResult<()> {
196        let (start, _) = self.next()?;
197        let mut end = start;
198        let mut fractal = false;
199
200        while let Some((e, c)) = self
201            .cursor
202            .next_if(|c| is_token_type!(c, "digit") || c == '_' || c == '.')
203        {
204            if fractal && c == '.' {
205                self.cursor.back();
206                break;
207            }
208
209            if c == '.' {
210                if let Some((_, p)) = self.cursor.peek() {
211                    if p == '.' {
212                        self.cursor.back();
213                        break;
214                    }
215
216                    fractal = true
217                }
218            }
219
220            end = e;
221        }
222
223        self.push(Token {
224            kind: TokenKind::Number,
225            span: (start as u32, (end + 1) as u32),
226            value: &self.source[start..=end],
227        });
228
229        Ok(())
230    }
231
232    fn bracket(&mut self) -> LexerResult<()> {
233        let (start, _) = self.next()?;
234
235        let value = &self.source[start..=start];
236        let span = (start as u32, (start + 1) as u32);
237        self.push(Token {
238            kind: TokenKind::Bracket(Bracket::from_str(value).map_err(|_| {
239                LexerError::UnexpectedSymbol {
240                    symbol: value.to_string(),
241                    span,
242                }
243            })?),
244            span,
245            value,
246        });
247
248        Ok(())
249    }
250
251    fn dot(&mut self) -> LexerResult<()> {
252        let (start, _) = self.next()?;
253        let mut end = start;
254
255        if self.cursor.next_if(|c| c == '.').is_some() {
256            end += 1;
257        }
258
259        let value = &self.source[start..=end];
260        let span = (start as u32, (end + 1) as u32);
261        self.push(Token {
262            kind: TokenKind::Operator(Operator::from_str(value).map_err(|_| {
263                LexerError::UnexpectedSymbol {
264                    symbol: value.to_string(),
265                    span,
266                }
267            })?),
268            span,
269            value,
270        });
271
272        Ok(())
273    }
274
275    fn cmp_operator(&mut self) -> LexerResult<()> {
276        let (start, _) = self.next()?;
277        let mut end = start;
278
279        if self.cursor.next_if(|c| c == '=').is_some() {
280            end += 1;
281        }
282
283        let value = &self.source[start..=end];
284        self.push(Token {
285            kind: TokenKind::Operator(Operator::from_str(value).map_err(|_| {
286                LexerError::UnexpectedSymbol {
287                    symbol: value.to_string(),
288                    span: (start as u32, (end + 1) as u32),
289                }
290            })?),
291            span: (start as u32, (end + 1) as u32),
292            value,
293        });
294
295        Ok(())
296    }
297
298    fn question_mark(&mut self) -> LexerResult<()> {
299        let (start, _) = self.next()?;
300        let mut kind = TokenKind::Operator(Operator::QuestionMark);
301        let mut end = start;
302
303        if self.cursor.next_if(|c| c == '?').is_some() {
304            kind = TokenKind::Operator(Operator::Logical(LogicalOperator::NullishCoalescing));
305            end += 1;
306        }
307
308        let value = &self.source[start..=end];
309        self.push(Token {
310            kind,
311            value,
312            span: (start as u32, (end + 1) as u32),
313        });
314
315        Ok(())
316    }
317
318    fn operator(&mut self) -> LexerResult<()> {
319        let (start, _) = self.next()?;
320
321        let value = &self.source[start..=start];
322        let span = (start as u32, (start + 1) as u32);
323        self.push(Token {
324            kind: TokenKind::Operator(Operator::from_str(value).map_err(|_| {
325                LexerError::UnexpectedSymbol {
326                    symbol: value.to_string(),
327                    span,
328                }
329            })?),
330            span,
331            value,
332        });
333
334        Ok(())
335    }
336
337    fn not(&mut self, start: usize) -> LexerResult<()> {
338        if self.cursor.next_if_is(" in ") {
339            let end = self.cursor.position();
340
341            self.push(Token {
342                kind: TokenKind::Operator(Operator::Comparison(ComparisonOperator::NotIn)),
343                span: (start as u32, (end - 1) as u32),
344                value: "not in",
345            })
346        } else {
347            let end = self.cursor.position();
348
349            self.push(Token {
350                kind: TokenKind::Operator(Operator::Logical(LogicalOperator::Not)),
351                span: (start as u32, end as u32),
352                value: "not",
353            })
354        }
355
356        Ok(())
357    }
358
359    fn identifier(&mut self) -> LexerResult<()> {
360        let (start, _) = self.next()?;
361        let mut end = start;
362
363        while let Some((e, _)) = self.cursor.next_if(|c| is_token_type!(c, "alphanumeric")) {
364            end = e;
365        }
366
367        let value = &self.source[start..=end];
368        match value {
369            "and" => self.push(Token {
370                kind: TokenKind::Operator(Operator::Logical(LogicalOperator::And)),
371                span: (start as u32, (end + 1) as u32),
372                value,
373            }),
374            "or" => self.push(Token {
375                kind: TokenKind::Operator(Operator::Logical(LogicalOperator::Or)),
376                span: (start as u32, (end + 1) as u32),
377                value,
378            }),
379            "in" => self.push(Token {
380                kind: TokenKind::Operator(Operator::Comparison(ComparisonOperator::In)),
381                span: (start as u32, (end + 1) as u32),
382                value,
383            }),
384            "true" => self.push(Token {
385                kind: TokenKind::Boolean(true),
386                span: (start as u32, (end + 1) as u32),
387                value,
388            }),
389            "false" => self.push(Token {
390                kind: TokenKind::Boolean(false),
391                span: (start as u32, (end + 1) as u32),
392                value,
393            }),
394            "not" => self.not(start)?,
395            _ => self.push(Token {
396                kind: Identifier::try_from(value)
397                    .map(|identifier| TokenKind::Identifier(identifier))
398                    .unwrap_or(TokenKind::Literal),
399                span: (start as u32, (end + 1) as u32),
400                value,
401            }),
402        }
403
404        Ok(())
405    }
406}