1use crate::{keywords::Keyword, Span};
14
15#[derive(Debug, Clone, PartialEq, Eq, Hash)]
17pub(crate) enum Token<'a> {
18 Ampersand,
19 At,
20 Backslash,
21 Caret,
22 Colon,
23 Comma,
24 Div,
25 DoubleColon,
26 DoubleExclamationMark,
27 DoubleAmpersand,
28 DoublePipe,
29 DoubleDollar,
30 Eq,
31 ExclamationMark,
32 Float(&'a str),
33 Gt,
34 GtEq,
35 Ident(&'a str, Keyword),
36 Integer(&'a str),
37 Invalid,
38 LBrace,
39 LBracket,
40 LParen,
41 Lt,
42 LtEq,
43 Minus,
44 Mod,
45 Mul,
46 Neq,
47 Period,
48 Pipe,
49 Plus,
50 QuestionMark,
51 RArrow,
52 RBrace,
53 RBracket,
54 RParen,
55 SemiColon,
56 Sharp,
57 ShiftLeft,
58 ShiftRight,
59 SingleQuotedString(&'a str),
60 DoubleQuotedString(&'a str),
61 Spaceship,
62 Tilde,
63 PercentS,
64 DollarArg(usize),
65 AtAtGlobal,
66 AtAtSession,
67 Eof,
68}
69
70impl<'a> Token<'a> {
71 pub(crate) fn name(&self) -> &'static str {
72 match self {
73 Token::Ampersand => "'&'",
74 Token::At => "'@'",
75 Token::Backslash => "'\\'",
76 Token::Caret => "'^'",
77 Token::Colon => "':'",
78 Token::Comma => "','",
79 Token::Div => "'/'",
80 Token::DoubleColon => "'::'",
81 Token::DoubleExclamationMark => "'!!'",
82 Token::DoublePipe => "'||'",
83 Token::DoubleAmpersand => "'&&'",
84 Token::Eq => "'='",
85 Token::ExclamationMark => "'!'",
86 Token::Float(_) => "Float",
87 Token::Gt => "'>'",
88 Token::GtEq => "'>='",
89 Token::Ident(_, Keyword::NOT_A_KEYWORD) => "Identifier",
90 Token::Ident(_, Keyword::QUOTED_IDENTIFIER) => "QuotedIdentifier",
91 Token::Ident(_, kw) => kw.name(),
92 Token::Integer(_) => "Integer",
93 Token::Invalid => "Invalid",
94 Token::LBrace => "'{'",
95 Token::LBracket => "'['",
96 Token::LParen => "'('",
97 Token::Lt => "'<'",
98 Token::LtEq => "'<='",
99 Token::Minus => "'-'",
100 Token::Mod => "'%'",
101 Token::Mul => "'*'",
102 Token::Neq => "'!='",
103 Token::Period => "'.'",
104 Token::Pipe => "'|'",
105 Token::Plus => "'+'",
106 Token::QuestionMark => "'?'",
107 Token::RArrow => "'=>'",
108 Token::RBrace => "'}'",
109 Token::RBracket => "']'",
110 Token::RParen => "')'",
111 Token::SemiColon => "';'",
112 Token::Sharp => "'#'",
113 Token::ShiftLeft => "'>>'",
114 Token::ShiftRight => "'<<'",
115 Token::DoubleDollar => "'$$'",
116 Token::DollarArg(v) if *v == 1 => "'$1'",
117 Token::DollarArg(v) if *v == 2 => "'$2'",
118 Token::DollarArg(v) if *v == 3 => "'$3'",
119 Token::DollarArg(v) if *v == 4 => "'$4'",
120 Token::DollarArg(v) if *v == 5 => "'$5'",
121 Token::DollarArg(v) if *v == 6 => "'$6'",
122 Token::DollarArg(v) if *v == 7 => "'$7'",
123 Token::DollarArg(v) if *v == 8 => "'$8'",
124 Token::DollarArg(v) if *v == 9 => "'$9'",
125 Token::DollarArg(_) => "'$i'",
126 Token::SingleQuotedString(_) => "String",
127 Token::DoubleQuotedString(_) => "String",
128 Token::Spaceship => "'<=>'",
129 Token::Tilde => "'~'",
130 Token::PercentS => "'%s'",
131 Token::AtAtGlobal => "@@GLOBAL",
132 Token::AtAtSession => "@@SESSION",
133 Token::Eof => "EndOfFile",
134 }
135 }
136}
137pub(crate) struct Lexer<'a> {
138 src: &'a str,
139 chars: core::iter::Peekable<core::str::CharIndices<'a>>,
140}
141
142impl<'a> Lexer<'a> {
143 pub fn new(src: &'a str) -> Self {
144 Self {
145 src,
146 chars: src.char_indices().peekable(),
147 }
148 }
149
150 pub(crate) fn s(&self, span: Span) -> &'a str {
151 core::str::from_utf8(&self.src.as_bytes()[span]).unwrap()
152 }
153
154 fn simple_literal(&mut self, start: usize) -> Token<'a> {
155 let end = loop {
156 match self.chars.peek() {
157 Some((_, '_' | 'a'..='z' | 'A'..='Z' | '0'..='9')) => {
158 self.chars.next();
159 }
160 Some((i, _)) => break *i,
161 None => break self.src.len(),
162 }
163 };
164 let s = self.s(start..end);
165 let ss = s.to_ascii_uppercase();
166 Token::Ident(s, ss.as_str().into())
167 }
168
169 pub fn read_from_stdin(&mut self) -> (&'a str, Span) {
174 while self
176 .chars
177 .peek()
178 .filter(|(_, c)| *c != '\n' && c.is_ascii_whitespace())
179 .is_some()
180 {
181 self.chars.next().unwrap();
182 }
183 let start = match self.chars.peek() {
184 Some((i, '\n')) => i + 1,
185 Some((i, _)) => *i,
186 None => {
187 let span = self.src.len()..self.src.len();
188 return (self.s(span.clone()), span);
189 }
190 };
191 while let Some((i, c)) = self.chars.next() {
192 if c != '\n' {
193 continue;
194 }
195 if !matches!(self.chars.peek(), Some((_, '\\'))) {
196 continue;
197 }
198 self.chars.next().unwrap();
199 if !matches!(self.chars.peek(), Some((_, '.'))) {
200 continue;
201 }
202 self.chars.next().unwrap();
203 if matches!(self.chars.peek(), Some((_, '\n'))) {
204 self.chars.next().unwrap();
206 } else if self.chars.peek().is_some() {
207 continue;
208 } else {
209 }
212 let span = start..(i + 1);
215 return (self.s(span.clone()), span);
216 }
217 let span = start..self.src.len();
219 return (self.s(span.clone()), span);
220 }
221
222 pub fn next_token(&mut self) -> (Token<'a>, Span) {
223 loop {
224 let (start, c) = match self.chars.next() {
225 Some(v) => v,
226 None => {
227 return (Token::Eof, self.src.len()..self.src.len());
228 }
229 };
230 let t = match c {
231 ' ' | '\t' | '\n' | '\r' => continue,
232 '?' => Token::QuestionMark,
233 ';' => Token::SemiColon,
234 '\\' => Token::Backslash,
235 '[' => Token::LBracket,
236 ']' => Token::RBracket,
237 '&' => match self.chars.peek() {
238 Some((_, '&')) => {
239 self.chars.next();
240 Token::DoubleAmpersand
241 }
242 _ => Token::Ampersand,
243 },
244 '^' => Token::Caret,
245 '{' => Token::LBrace,
246 '}' => Token::RBrace,
247 '(' => Token::LParen,
248 ')' => Token::RParen,
249 ',' => Token::Comma,
250 '+' => Token::Plus,
251 '*' => Token::Mul,
252 '%' => match self.chars.peek() {
253 Some((_, 's')) => {
254 self.chars.next();
255 Token::PercentS
256 }
257 _ => Token::Mod,
258 },
259 '#' => Token::Sharp,
260 '@' => match self.chars.peek() {
261 Some((_, '@')) => {
262 self.chars.next();
263 #[allow(clippy::never_loop)]
264 match self.chars.peek() {
265 Some((_, 's' | 'S')) => loop {
266 self.chars.next();
267 if !matches!(self.chars.peek(), Some((_, 'e' | 'E'))) {
268 break Token::Invalid;
269 }
270 self.chars.next();
271 if !matches!(self.chars.peek(), Some((_, 's' | 'S'))) {
272 break Token::Invalid;
273 }
274 self.chars.next();
275 if !matches!(self.chars.peek(), Some((_, 's' | 'S'))) {
276 break Token::Invalid;
277 }
278 self.chars.next();
279 if !matches!(self.chars.peek(), Some((_, 'i' | 'I'))) {
280 break Token::Invalid;
281 }
282 self.chars.next();
283 if !matches!(self.chars.peek(), Some((_, 'o' | 'O'))) {
284 break Token::Invalid;
285 }
286 self.chars.next();
287 if !matches!(self.chars.peek(), Some((_, 'n' | 'N'))) {
288 break Token::Invalid;
289 }
290 self.chars.next();
291 break Token::AtAtSession;
292 },
293 Some((_, 'g' | 'G')) => loop {
294 self.chars.next();
295 if !matches!(self.chars.peek(), Some((_, 'l' | 'L'))) {
296 break Token::Invalid;
297 }
298 self.chars.next();
299 if !matches!(self.chars.peek(), Some((_, 'o' | 'O'))) {
300 break Token::Invalid;
301 }
302 self.chars.next();
303 if !matches!(self.chars.peek(), Some((_, 'b' | 'B'))) {
304 break Token::Invalid;
305 }
306 self.chars.next();
307 if !matches!(self.chars.peek(), Some((_, 'a' | 'A'))) {
308 break Token::Invalid;
309 }
310 self.chars.next();
311 if !matches!(self.chars.peek(), Some((_, 'l' | 'L'))) {
312 break Token::Invalid;
313 }
314 self.chars.next();
315 break Token::AtAtGlobal;
316 },
317 _ => Token::Invalid,
318 }
319 }
320 _ => Token::At,
321 },
322 '~' => Token::Tilde,
323 ':' => match self.chars.peek() {
324 Some((_, ':')) => {
325 self.chars.next();
326 Token::DoubleColon
327 }
328 _ => Token::Colon,
329 },
330 '$' => match self.chars.peek() {
331 Some((_, '$')) => {
332 self.chars.next();
333 Token::DoubleDollar
334 }
335 Some((_, '1'..='9')) => {
336 let mut v = self.chars.peek().unwrap().1.to_digit(10).unwrap() as usize;
337 self.chars.next();
338 while matches!(self.chars.peek(), Some((_, '0'..='9'))) {
339 v = v * 10
340 + self.chars.peek().unwrap().1.to_digit(10).unwrap() as usize;
341 self.chars.next();
342 }
343 Token::DollarArg(v)
344 }
345 _ => Token::Invalid,
346 },
347 '=' => match self.chars.peek() {
348 Some((_, '>')) => {
349 self.chars.next();
350 Token::RArrow
351 }
352 _ => Token::Eq,
353 },
354 '!' => match self.chars.peek() {
355 Some((_, '=')) => {
356 self.chars.next();
357 Token::Neq
358 }
359 Some((_, '!')) => {
360 self.chars.next();
361 Token::DoubleExclamationMark
362 }
363 _ => Token::ExclamationMark,
364 },
365 '<' => match self.chars.peek() {
366 Some((_, '=')) => {
367 self.chars.next();
368 match self.chars.peek() {
369 Some((_, '>')) => {
370 self.chars.next();
371 Token::Spaceship
372 }
373 _ => Token::LtEq,
374 }
375 }
376 Some((_, '>')) => {
377 self.chars.next();
378 Token::Neq
379 }
380 Some((_, '<')) => {
381 self.chars.next();
382 Token::ShiftLeft
383 }
384 _ => Token::Lt,
385 },
386 '>' => match self.chars.peek() {
387 Some((_, '=')) => {
388 self.chars.next();
389 Token::GtEq
390 }
391 Some((_, '>')) => {
392 self.chars.next();
393 Token::ShiftRight
394 }
395 _ => Token::Gt,
396 },
397 '|' => match self.chars.peek() {
398 Some((_, '|')) => {
399 self.chars.next();
400 Token::DoublePipe
401 }
402 _ => Token::Pipe,
403 },
404 '-' => match self.chars.peek() {
405 Some((_, '-')) => {
406 while !matches!(self.chars.next(), Some((_, '\r' | '\n')) | None) {}
407 continue;
408 }
409 _ => Token::Minus,
410 },
411 '/' => match self.chars.peek() {
412 Some((_, '*')) => {
413 self.chars.next();
414 let ok = loop {
415 match self.chars.next() {
416 Some((_, '*')) => {
417 if matches!(self.chars.peek(), Some((_, '/'))) {
418 self.chars.next();
419 break true;
420 }
421 }
422 Some(_) => (),
423 None => break false,
424 }
425 };
426 if ok {
427 continue;
428 } else {
429 Token::Invalid
430 }
431 }
432 Some((_, '/')) => {
433 while !matches!(self.chars.next(), Some((_, '\r' | '\n')) | None) {}
434 continue;
435 }
436 _ => Token::Div,
437 },
438 'x' | 'X' => match self.chars.peek() {
439 Some((_, '\'')) => {
440 todo!("Hex literal")
441 }
442 _ => self.simple_literal(start),
443 },
444 '_' | 'a'..='z' | 'A'..='Z' => self.simple_literal(start),
445 '`' => {
446 while matches!(
447 self.chars.peek(),
448 Some((_, '_' | 'a'..='z' | 'A'..='Z' | '0'..='9' | '-'))
449 ) {
450 self.chars.next();
451 }
452 match self.chars.peek() {
453 Some((i, '`')) => {
454 let i = *i;
455 self.chars.next();
456 Token::Ident(self.s(start + 1..i), Keyword::QUOTED_IDENTIFIER)
457 }
458 _ => Token::Invalid,
459 }
460 }
461 '\'' => loop {
462 match self.chars.next() {
463 Some((_, '\\')) => {
464 self.chars.next();
465 }
466 Some((i, '\'')) => match self.chars.peek() {
467 Some((_, '\'')) => {
468 self.chars.next();
469 }
470 _ => break Token::SingleQuotedString(self.s(start + 1..i)),
471 },
472 Some((_, _)) => (),
473 None => break Token::Invalid,
474 }
475 },
476 '"' => loop {
477 match self.chars.next() {
478 Some((_, '\\')) => {
479 self.chars.next();
480 }
481 Some((i, '"')) => match self.chars.peek() {
482 Some((_, '"')) => {
483 self.chars.next();
484 }
485 _ => break Token::DoubleQuotedString(self.s(start + 1..i)),
486 },
487 Some((_, _)) => (),
488 None => break Token::Invalid,
489 }
490 },
491 '0'..='9' => loop {
492 match self.chars.peek() {
493 Some((_, '0'..='9')) => {
494 self.chars.next();
495 }
496 Some((_, '.')) => {
497 self.chars.next();
498 break loop {
499 match self.chars.peek() {
500 Some((_, '0'..='9')) => {
501 self.chars.next();
502 }
503 Some((i, _)) => {
504 let i = *i;
505 break Token::Float(self.s(start..i));
506 }
507 None => break Token::Float(self.s(start..self.src.len())),
508 }
509 };
510 }
511 Some((i, _)) => {
512 let i = *i;
513 break Token::Integer(self.s(start..i));
514 }
515 None => break Token::Integer(self.s(start..self.src.len())),
516 }
517 },
518 '.' => match self.chars.peek() {
519 Some((_, '0'..='9')) => loop {
520 match self.chars.peek() {
521 Some((_, '0'..='9')) => {
522 self.chars.next();
523 }
524 Some((i, _)) => {
525 let i = *i;
526 break Token::Float(self.s(start..i));
527 }
528 None => break Token::Float(self.s(start..self.src.len())),
529 }
530 },
531 _ => Token::Period,
532 },
533 _ => Token::Invalid,
534 };
535
536 let end = match self.chars.peek() {
537 Some((i, _)) => *i,
538 None => self.src.len(),
539 };
540 return (t, start..end);
541
542 }
587 }
588}
589
590impl<'a> Iterator for Lexer<'a> {
591 type Item = (Token<'a>, Span);
592
593 fn next(&mut self) -> Option<Self::Item> {
594 Some(self.next_token())
595 }
596}