1use crate::lexer::codes::{is_token_type, token_type};
2use crate::lexer::cursor::{Cursor, CursorItem};
3use crate::lexer::error::LexerResult;
4use crate::lexer::token::{
5 Bracket, ComparisonOperator, Identifier, LogicalOperator, Operator, Token, TokenKind,
6};
7use crate::lexer::{LexerError, QuotationMark, TemplateString};
8use std::str::FromStr;
9
10#[derive(Debug, Default)]
11pub struct Lexer<'arena> {
12 tokens: Vec<Token<'arena>>,
13}
14
15impl<'arena> Lexer<'arena> {
16 pub fn new() -> Self {
17 Self::default()
18 }
19
20 pub fn tokenize(&mut self, source: &'arena str) -> LexerResult<&[Token<'arena>]> {
21 self.tokens.clear();
22
23 Scanner::new(source, &mut self.tokens).scan()?;
24 Ok(&self.tokens)
25 }
26}
27
28struct Scanner<'arena, 'self_ref> {
29 cursor: Cursor<'arena>,
30 tokens: &'self_ref mut Vec<Token<'arena>>,
31 source: &'arena str,
32}
33
34impl<'arena, 'self_ref> Scanner<'arena, 'self_ref> {
35 pub fn new(source: &'arena str, tokens: &'self_ref mut Vec<Token<'arena>>) -> Self {
36 Self {
37 cursor: Cursor::from(source),
38 source,
39 tokens,
40 }
41 }
42
43 pub fn scan(&mut self) -> LexerResult<()> {
44 while let Some(cursor_item) = self.cursor.peek() {
45 self.scan_cursor_item(cursor_item)?;
46 }
47
48 Ok(())
49 }
50
51 pub(crate) fn scan_cursor_item(&mut self, cursor_item: CursorItem) -> LexerResult<()> {
52 let (i, s) = cursor_item;
53
54 match s {
55 token_type!("space") => {
56 self.cursor.next();
57 Ok(())
58 }
59 '\'' => self.string(QuotationMark::SingleQuote),
60 '"' => self.string(QuotationMark::DoubleQuote),
61 token_type!("digit") => self.number(),
62 token_type!("bracket") => self.bracket(),
63 token_type!("cmp_operator") => self.cmp_operator(),
64 token_type!("operator") => self.operator(),
65 token_type!("question_mark") => self.question_mark(),
66 '`' => self.template_string(),
67 '.' => self.dot(),
68 token_type!("alpha") => self.identifier(),
69 _ => Err(LexerError::UnmatchedSymbol {
70 symbol: s,
71 position: i as u32,
72 }),
73 }
74 }
75
76 fn next(&self) -> LexerResult<CursorItem> {
77 self.cursor.next().ok_or_else(|| {
78 let (a, b) = self.cursor.peek_back().unwrap_or((0, ' '));
79
80 LexerError::UnexpectedEof {
81 symbol: b,
82 position: a as u32,
83 }
84 })
85 }
86
87 fn push(&mut self, token: Token<'arena>) {
88 self.tokens.push(token);
89 }
90
91 fn template_string(&mut self) -> LexerResult<()> {
92 let (start, _) = self.next()?;
93
94 self.tokens.push(Token {
95 kind: TokenKind::QuotationMark(QuotationMark::Backtick),
96 span: (start as u32, (start + 1) as u32),
97 value: QuotationMark::Backtick.into(),
98 });
99
100 let mut in_expression = false;
101 let mut str_start = start + 1;
102 loop {
103 let (e, c) = self.next()?;
104
105 match (c, in_expression) {
106 ('`', _) => {
107 if str_start < e {
108 self.tokens.push(Token {
109 kind: TokenKind::Literal,
110 span: (str_start as u32, e as u32),
111 value: &self.source[str_start..e],
112 });
113 }
114
115 self.tokens.push(Token {
116 kind: TokenKind::QuotationMark(QuotationMark::Backtick),
117 span: (e as u32, (e + 1) as u32),
118 value: QuotationMark::Backtick.into(),
119 });
120
121 break;
122 }
123 ('$', false) => {
124 in_expression = self.cursor.next_if_is("{");
125 if in_expression {
126 self.tokens.push(Token {
127 kind: TokenKind::Literal,
128 span: (str_start as u32, e as u32),
129 value: &self.source[str_start..e],
130 });
131
132 self.tokens.push(Token {
133 kind: TokenKind::TemplateString(TemplateString::ExpressionStart),
134 span: (e as u32, (e + 2) as u32),
135 value: TemplateString::ExpressionStart.into(),
136 });
137 }
138 }
139 ('}', true) => {
140 in_expression = false;
141 self.tokens.push(Token {
142 kind: TokenKind::TemplateString(TemplateString::ExpressionEnd),
143 span: (str_start as u32, e as u32),
144 value: TemplateString::ExpressionEnd.into(),
145 });
146
147 str_start = e + 1;
148 }
149 (_, false) => {
150 }
152 (_, true) => {
153 self.cursor.back();
154 self.scan_cursor_item((e, c))?;
155 }
156 }
157 }
158
159 Ok(())
160 }
161
162 fn string(&mut self, quote_kind: QuotationMark) -> LexerResult<()> {
163 let (start, opener) = self.next()?;
164 let end: usize;
165
166 loop {
167 let (e, c) = self.next()?;
168 if c == opener {
169 end = e;
170 break;
171 }
172 }
173
174 self.push(Token {
175 kind: TokenKind::QuotationMark(quote_kind),
176 span: (start as u32, (start + 1) as u32),
177 value: quote_kind.into(),
178 });
179
180 self.push(Token {
181 kind: TokenKind::Literal,
182 span: ((start + 1) as u32, end as u32),
183 value: &self.source[start + 1..end],
184 });
185
186 self.push(Token {
187 kind: TokenKind::QuotationMark(quote_kind),
188 span: (end as u32, (end + 1) as u32),
189 value: quote_kind.into(),
190 });
191
192 Ok(())
193 }
194
195 fn number(&mut self) -> LexerResult<()> {
196 let (start, _) = self.next()?;
197 let mut end = start;
198 let mut fractal = false;
199
200 while let Some((e, c)) = self
201 .cursor
202 .next_if(|c| is_token_type!(c, "digit") || c == '_' || c == '.')
203 {
204 if fractal && c == '.' {
205 self.cursor.back();
206 break;
207 }
208
209 if c == '.' {
210 if let Some((_, p)) = self.cursor.peek() {
211 if p == '.' {
212 self.cursor.back();
213 break;
214 }
215
216 fractal = true
217 }
218 }
219
220 end = e;
221 }
222
223 self.push(Token {
224 kind: TokenKind::Number,
225 span: (start as u32, (end + 1) as u32),
226 value: &self.source[start..=end],
227 });
228
229 Ok(())
230 }
231
232 fn bracket(&mut self) -> LexerResult<()> {
233 let (start, _) = self.next()?;
234
235 let value = &self.source[start..=start];
236 let span = (start as u32, (start + 1) as u32);
237 self.push(Token {
238 kind: TokenKind::Bracket(Bracket::from_str(value).map_err(|_| {
239 LexerError::UnexpectedSymbol {
240 symbol: value.to_string(),
241 span,
242 }
243 })?),
244 span,
245 value,
246 });
247
248 Ok(())
249 }
250
251 fn dot(&mut self) -> LexerResult<()> {
252 let (start, _) = self.next()?;
253 let mut end = start;
254
255 if self.cursor.next_if(|c| c == '.').is_some() {
256 end += 1;
257 }
258
259 let value = &self.source[start..=end];
260 let span = (start as u32, (end + 1) as u32);
261 self.push(Token {
262 kind: TokenKind::Operator(Operator::from_str(value).map_err(|_| {
263 LexerError::UnexpectedSymbol {
264 symbol: value.to_string(),
265 span,
266 }
267 })?),
268 span,
269 value,
270 });
271
272 Ok(())
273 }
274
275 fn cmp_operator(&mut self) -> LexerResult<()> {
276 let (start, _) = self.next()?;
277 let mut end = start;
278
279 if self.cursor.next_if(|c| c == '=').is_some() {
280 end += 1;
281 }
282
283 let value = &self.source[start..=end];
284 self.push(Token {
285 kind: TokenKind::Operator(Operator::from_str(value).map_err(|_| {
286 LexerError::UnexpectedSymbol {
287 symbol: value.to_string(),
288 span: (start as u32, (end + 1) as u32),
289 }
290 })?),
291 span: (start as u32, (end + 1) as u32),
292 value,
293 });
294
295 Ok(())
296 }
297
298 fn question_mark(&mut self) -> LexerResult<()> {
299 let (start, _) = self.next()?;
300 let mut kind = TokenKind::Operator(Operator::QuestionMark);
301 let mut end = start;
302
303 if self.cursor.next_if(|c| c == '?').is_some() {
304 kind = TokenKind::Operator(Operator::Logical(LogicalOperator::NullishCoalescing));
305 end += 1;
306 }
307
308 let value = &self.source[start..=end];
309 self.push(Token {
310 kind,
311 value,
312 span: (start as u32, (end + 1) as u32),
313 });
314
315 Ok(())
316 }
317
318 fn operator(&mut self) -> LexerResult<()> {
319 let (start, _) = self.next()?;
320
321 let value = &self.source[start..=start];
322 let span = (start as u32, (start + 1) as u32);
323 self.push(Token {
324 kind: TokenKind::Operator(Operator::from_str(value).map_err(|_| {
325 LexerError::UnexpectedSymbol {
326 symbol: value.to_string(),
327 span,
328 }
329 })?),
330 span,
331 value,
332 });
333
334 Ok(())
335 }
336
337 fn not(&mut self, start: usize) -> LexerResult<()> {
338 if self.cursor.next_if_is(" in ") {
339 let end = self.cursor.position();
340
341 self.push(Token {
342 kind: TokenKind::Operator(Operator::Comparison(ComparisonOperator::NotIn)),
343 span: (start as u32, (end - 1) as u32),
344 value: "not in",
345 })
346 } else {
347 let end = self.cursor.position();
348
349 self.push(Token {
350 kind: TokenKind::Operator(Operator::Logical(LogicalOperator::Not)),
351 span: (start as u32, end as u32),
352 value: "not",
353 })
354 }
355
356 Ok(())
357 }
358
359 fn identifier(&mut self) -> LexerResult<()> {
360 let (start, _) = self.next()?;
361 let mut end = start;
362
363 while let Some((e, _)) = self.cursor.next_if(|c| is_token_type!(c, "alphanumeric")) {
364 end = e;
365 }
366
367 let value = &self.source[start..=end];
368 match value {
369 "and" => self.push(Token {
370 kind: TokenKind::Operator(Operator::Logical(LogicalOperator::And)),
371 span: (start as u32, (end + 1) as u32),
372 value,
373 }),
374 "or" => self.push(Token {
375 kind: TokenKind::Operator(Operator::Logical(LogicalOperator::Or)),
376 span: (start as u32, (end + 1) as u32),
377 value,
378 }),
379 "in" => self.push(Token {
380 kind: TokenKind::Operator(Operator::Comparison(ComparisonOperator::In)),
381 span: (start as u32, (end + 1) as u32),
382 value,
383 }),
384 "true" => self.push(Token {
385 kind: TokenKind::Boolean(true),
386 span: (start as u32, (end + 1) as u32),
387 value,
388 }),
389 "false" => self.push(Token {
390 kind: TokenKind::Boolean(false),
391 span: (start as u32, (end + 1) as u32),
392 value,
393 }),
394 "not" => self.not(start)?,
395 _ => self.push(Token {
396 kind: Identifier::try_from(value)
397 .map(|identifier| TokenKind::Identifier(identifier))
398 .unwrap_or(TokenKind::Literal),
399 span: (start as u32, (end + 1) as u32),
400 value,
401 }),
402 }
403
404 Ok(())
405 }
406}