1#![allow(clippy::or_fun_call)]
17
18#[macro_use]
19mod token;
20mod state;
21mod tests;
22
23
24#[rustfmt::skip]
25mod tables;
26
27pub use token::Token;
28
29#[cfg(feature = "highlight")]
30mod highlight;
31#[cfg(feature = "highlight")]
32pub use highlight::*;
33
34use rslint_errors::Diagnostic;
35use state::LexerState;
36use tables::derived_property::*;
37
38pub use rslint_syntax::*;
39pub type LexerReturn = (Token, Option<Diagnostic>);
40
41macro_rules! unwind_loop {
43 ($($iter:tt)*) => {
44 $($iter)*
45 $($iter)*
46 $($iter)*
47 $($iter)*
48 $($iter)*
49
50 loop {
51 $($iter)*
52 $($iter)*
53 $($iter)*
54 $($iter)*
55 $($iter)*
56 }
57 };
58}
59
60const UNICODE_WHITESPACE_STARTS: [u8; 5] = [
62 0xC2, 0xEF, 0xE1, 0xE2, 0xE3,
68];
69
70const UNICODE_SPACES: [char; 19] = [
72 '\u{0020}', '\u{00A0}', '\u{1680}', '\u{2000}', '\u{2001}', '\u{2002}', '\u{2003}', '\u{2004}',
73 '\u{2005}', '\u{2006}', '\u{2007}', '\u{2008}', '\u{2009}', '\u{200A}', '\u{200B}', '\u{202F}',
74 '\u{205F}', '\u{3000}', '\u{FEFF}',
75];
76
77fn is_id_start(c: char) -> bool {
78 c == '_' || c == '$' || ID_Start(c)
79}
80
81fn is_id_continue(c: char) -> bool {
82 c == '$' || c == '\u{200d}' || c == '\u{200c}' || ID_Continue(c)
83}
84
85#[derive(Debug, Clone, PartialEq, Eq, Hash)]
87pub struct Lexer<'src> {
88 bytes: &'src [u8],
89 cur: usize,
90 state: LexerState,
91 pub file_id: usize,
92 returned_eof: bool,
93}
94
95impl<'src> Lexer<'src> {
96 pub unsafe fn from_bytes(bytes: &'src [u8], file_id: usize) -> Self {
101 Self {
102 bytes,
103 cur: 0,
104 file_id,
105 state: LexerState::new(),
106 returned_eof: false,
107 }
108 }
109
110 pub fn from_str(string: &'src str, file_id: usize) -> Self {
112 Self {
113 bytes: string.as_bytes(),
114 cur: 0,
115 file_id,
116 state: LexerState::new(),
117 returned_eof: false,
118 }
119 }
120
121 fn eat(&mut self, tok: LexerReturn) -> LexerReturn {
123 self.next();
124 tok
125 }
126
127 fn consume_whitespace(&mut self) {
129 unwind_loop! {
130 if let Some(byte) = self.next().copied() {
131 if DISPATCHER[byte as usize] != Dispatch::WHS {
133 if byte > 0xC1 && UNICODE_WHITESPACE_STARTS.contains(&byte) {
135 let chr = self.get_unicode_char();
136 if is_linebreak(chr) {
137 self.state.had_linebreak = true;
138 }
139 if !UNICODE_SPACES.contains(&chr) {
140 return;
141 }
142 self.cur += chr.len_utf8() - 1;
143 } else {
144 return;
145 }
146 }
147 if is_linebreak(byte as char) {
148 self.state.had_linebreak = true;
149 }
150 } else {
151 return;
152 }
153 }
154 }
155
156 fn get_unicode_char(&self) -> char {
158 debug_assert!(self.cur < self.bytes.len());
160
161 let string = unsafe { std::str::from_utf8_unchecked(self.bytes.get_unchecked(self.cur..)) };
163 let chr = if let Some(chr) = string.chars().next() {
164 chr
165 } else {
166 unsafe {
168 core::hint::unreachable_unchecked();
169 }
170 };
171
172 chr
173 }
174
175 #[inline]
177 fn next(&mut self) -> Option<&u8> {
178 self.cur += 1;
179 self.bytes.get(self.cur)
180 }
181
182 #[inline]
185 fn next_bounded(&mut self) -> Option<&u8> {
186 if let Some(b) = self.bytes.get(self.cur + 1) {
187 self.cur += 1;
188 Some(b)
189 } else {
190 if self.cur != self.bytes.len() {
191 self.cur += 1;
192 }
193 None
194 }
195 }
196
197 fn advance(&mut self, amount: usize) {
198 self.cur += amount;
199 }
200
201 fn lookup(byte: u8) -> Dispatch {
202 unsafe { *DISPATCHER.get_unchecked(byte as usize) }
204 }
205
206 fn read_codepoint_escape(&mut self) -> Result<char, Diagnostic> {
208 let start = self.cur + 1;
209 self.read_hexnumber();
210
211 if self.bytes.get(self.cur) != Some(&b'}') {
212 let invalid = self.get_unicode_char();
215 let err = Diagnostic::error(self.file_id, "", "expected hex digits for a unicode code point escape, but encountered an invalid character")
216 .primary(self.cur .. invalid.len_utf8(), "");
217
218 self.cur -= 1;
219 return Err(err);
220 }
221
222 let digits_str = unsafe {
228 debug_assert!(self.bytes.get(start..self.cur).is_some());
229 debug_assert!(std::str::from_utf8(self.bytes.get_unchecked(start..self.cur)).is_ok());
230
231 std::str::from_utf8_unchecked(self.bytes.get_unchecked(start..self.cur))
232 };
233
234 match u32::from_str_radix(digits_str, 16) {
235 Ok(digits) if digits <= 0x10FFFF => {
236 let res = std::char::from_u32(digits);
237 if let Some(chr) = res {
238 Ok(chr)
239 } else {
240 let err =
241 Diagnostic::error(self.file_id, "", "invalid codepoint for unicode escape")
242 .primary(start..self.cur, "");
243
244 Err(err)
245 }
246 }
247
248 _ => {
249 let err = Diagnostic::error(
250 self.file_id,
251 "",
252 "out of bounds codepoint for unicode codepoint escape sequence",
253 )
254 .primary(start..self.cur, "")
255 .footer_note("Codepoints range from 0 to 0x10FFFF (1114111)");
256
257 Err(err)
258 }
259 }
260 }
261
262 fn read_unicode_escape(&mut self, advance: bool) -> Result<char, Diagnostic> {
265 debug_assert_eq!(self.bytes[self.cur], b'u');
266
267 let diagnostic = Diagnostic::error(
268 self.file_id,
269 "",
270 "invalid digits after unicode escape sequence",
271 )
272 .primary(
273 self.cur - 1..self.cur + 1,
274 "expected 4 hex digits following this",
275 );
276
277 for idx in 0..4 {
278 match self.next_bounded() {
279 None => {
280 if !advance {
281 self.cur -= idx + 1;
282 }
283 return Err(diagnostic);
284 }
285 Some(b) if !b.is_ascii_hexdigit() => {
286 if !advance {
287 self.cur -= idx + 1;
288 }
289 return Err(diagnostic);
290 }
291 _ => {}
292 }
293 }
294
295 unsafe {
296 let digits_str = std::str::from_utf8_unchecked(
298 self.bytes.get_unchecked((self.cur - 3)..(self.cur + 1)),
299 );
300 if let Ok(digits) = u32::from_str_radix(digits_str, 16) {
301 if !advance {
302 self.cur -= 4;
303 }
304 Ok(std::char::from_u32_unchecked(digits))
306 } else {
307 core::hint::unreachable_unchecked();
310 }
311 }
312 }
313
314 fn validate_hex_escape(&mut self) -> Option<Diagnostic> {
317 debug_assert_eq!(self.bytes[self.cur], b'x');
318
319 let diagnostic =
320 Diagnostic::error(self.file_id, "", "invalid digits after hex escape sequence")
321 .primary(
322 (self.cur - 1)..(self.cur + 1),
323 "Expected 2 hex digits following this",
324 );
325
326 for _ in 0..2 {
327 match self.next_bounded() {
328 None => return Some(diagnostic),
329 Some(b) if !(*b as u8).is_ascii_hexdigit() => return Some(diagnostic),
330 _ => {}
331 }
332 }
333 None
334 }
335
336 fn validate_escape_sequence(&mut self) -> Option<Diagnostic> {
338 let cur = self.cur;
339 if let Some(escape) = self.bytes.get(self.cur + 1) {
340 match escape {
341 b'u' if self.bytes.get(self.cur + 2) == Some(&b'{') => {
342 self.advance(2);
343 self.read_codepoint_escape().err()
344 }
345 b'u' => {
346 self.next();
347 self.read_unicode_escape(true).err()
348 }
349 b'x' => {
350 self.next();
351 self.validate_hex_escape()
352 }
353 _ => {
354 let chr = self.get_unicode_char();
356 self.cur += chr.len_utf8();
357 None
358 }
359 }
360 } else {
361 Some(Diagnostic::error(self.file_id, "", "").primary(
362 cur..cur + 1,
363 "expected an escape sequence following a backslash, but found none",
364 ))
365 }
366 }
367
368 #[inline]
370 fn consume_ident(&mut self) {
371 unwind_loop! {
372 if self.next_bounded().is_some() {
373 if self.cur_ident_part().is_none() {
374 return;
375 }
376 } else {
377 return;
378 }
379 }
380 }
381
382 #[inline]
389 fn consume_and_get_ident(&mut self, buf: &mut [u8]) -> usize {
390 let mut idx = 0;
391
392 unwind_loop! {
393 if self.next_bounded().is_some() {
394 if let Some(c) = self.cur_ident_part() {
395 if let Some(buf) = buf.get_mut(idx..idx + 4) {
396 let res = c.encode_utf8(buf);
397 idx += res.len();
398 }
399 } else {
400 return idx;
401 }
402 } else {
403 return idx;
404 }
405 }
406 }
407
408 fn read_str_literal(&mut self) -> Option<Diagnostic> {
411 let quote = unsafe { *self.bytes.get_unchecked(self.cur) };
413 let start = self.cur;
414 let mut diagnostic = None;
415
416 while let Some(byte) = self.next_bounded() {
417 match *byte {
418 b'\\' => {
419 diagnostic = self.validate_escape_sequence();
420 }
421 b if b == quote => {
422 self.next();
423 return diagnostic;
424 }
425 _ => {}
426 }
427 }
428
429 let unterminated = Diagnostic::error(self.file_id, "", "unterminated string literal")
430 .primary(self.cur..self.cur, "input ends here")
431 .secondary(start..start + 1, "string literal starts here");
432
433 Some(unterminated)
434 }
435
436 #[inline]
442 fn cur_ident_part(&mut self) -> Option<char> {
443 debug_assert!(self.cur < self.bytes.len());
444
445 let b = unsafe { self.bytes.get_unchecked(self.cur) };
447
448 match Self::lookup(*b) {
449 IDT | DIG | ZER => Some(*b as char),
450 UNI => {
452 let chr = self.get_unicode_char();
453 let res = is_id_continue(chr);
454 if res {
455 self.cur += chr.len_utf8() - 1;
456 Some(chr)
457 } else {
458 None
459 }
460 }
461 BSL if self.bytes.get(self.cur + 1) == Some(&b'u') => {
462 let start = self.cur;
463 self.next();
464 let res = if self.bytes.get(self.cur + 1).copied() == Some(b'{') {
465 self.next();
466 self.read_codepoint_escape()
467 } else {
468 self.read_unicode_escape(true)
469 };
470
471 if let Ok(c) = res {
472 if is_id_continue(c) {
473 Some(c)
474 } else {
475 self.cur -= 1;
476 None
477 }
478 } else {
479 self.cur = start;
480 None
481 }
482 }
483 _ => None,
484 }
485 }
486
487 #[inline]
490 fn cur_is_ident_start(&mut self) -> bool {
491 debug_assert!(self.cur < self.bytes.len());
492
493 let b = unsafe { self.bytes.get_unchecked(self.cur) };
495
496 match Self::lookup(*b) {
497 BSL if self.bytes.get(self.cur + 1) == Some(&b'u') => {
498 self.next();
499 if let Ok(chr) = self.read_unicode_escape(false) {
500 if is_id_start(chr) {
501 self.advance(5);
502 return true;
503 }
504 }
505 self.cur -= 1;
506 false
507 }
508 UNI => {
509 let chr = self.get_unicode_char();
510 if is_id_start(chr) {
511 self.cur += chr.len_utf8() - 1;
512 true
513 } else {
514 false
515 }
516 }
517 IDT => true,
518 _ => false,
519 }
520 }
521
522 #[inline]
528 fn resolve_identifier(&mut self, first: (char, usize)) -> LexerReturn {
529 use SyntaxKind::*;
530
531 let mut buf = [0u8; 16];
534 let (len, start) = (first.0.encode_utf8(&mut buf).len(), first.1);
535
536 let count = self.consume_and_get_ident(&mut buf[len..]);
537
538 let kind = match &buf[..count + len] {
539 b"await" => Some(AWAIT_KW),
540 b"break" => Some(BREAK_KW),
541 b"case" => Some(CASE_KW),
542 b"catch" => Some(CATCH_KW),
543 b"class" => Some(CLASS_KW),
544 b"const" => Some(CONST_KW),
545 b"continue" => Some(CONTINUE_KW),
546 b"debugger" => Some(DEBUGGER_KW),
547 b"default" => Some(DEFAULT_KW),
548 b"delete" => Some(DELETE_KW),
549 b"do" => Some(DO_KW),
550 b"else" => Some(ELSE_KW),
551 b"enum" => Some(ENUM_KW),
552 b"export" => Some(EXPORT_KW),
553 b"extends" => Some(EXTENDS_KW),
554 b"false" => Some(FALSE_KW),
555 b"finally" => Some(FINALLY_KW),
556 b"for" => Some(FOR_KW),
557 b"function" => Some(FUNCTION_KW),
558 b"if" => Some(IF_KW),
559 b"in" => Some(IN_KW),
560 b"import" => Some(IMPORT_KW),
561 b"instanceof" => Some(INSTANCEOF_KW),
562 b"new" => Some(NEW_KW),
563 b"null" => Some(NULL_KW),
564 b"return" => Some(RETURN_KW),
565 b"super" => Some(SUPER_KW),
566 b"switch" => Some(SWITCH_KW),
567 b"this" => Some(THIS_KW),
568 b"throw" => Some(THROW_KW),
569 b"try" => Some(TRY_KW),
570 b"true" => Some(TRUE_KW),
571 b"typeof" => Some(TYPEOF_KW),
572 b"var" => Some(VAR_KW),
573 b"void" => Some(VOID_KW),
574 b"while" => Some(WHILE_KW),
575 b"with" => Some(WITH_KW),
576 b"yield" => Some(YIELD_KW),
577 _ => None,
578 };
579
580 if let Some(kind) = kind {
581 (Token::new(kind, self.cur - start), None)
582 } else {
583 (Token::new(T![ident], self.cur - start), None)
584 }
585 }
586
587 #[inline]
588 fn special_number_start<F: Fn(char) -> bool>(&mut self, func: F) -> bool {
589 if self
590 .bytes
591 .get(self.cur + 2)
592 .map(|b| func(*b as char))
593 .unwrap_or(false)
594 {
595 self.cur += 1;
596 true
597 } else {
598 false
599 }
600 }
601
602 #[inline]
603 fn maybe_bigint(&mut self) {
604 if let Some(b'n') = self.bytes.get(self.cur) {
605 self.next();
606 }
607 }
608
609 #[inline]
610 fn read_zero(&mut self) -> Option<Diagnostic> {
611 match self.bytes.get(self.cur + 1) {
613 Some(b'x') | Some(b'X') => {
614 if self.special_number_start(|c| c.is_ascii_hexdigit()) {
615 let diag = self.read_hexnumber();
616 self.maybe_bigint();
617 diag
618 } else {
619 self.next();
620 None
621 }
622 }
623 Some(b'b') | Some(b'B') => {
624 if self.special_number_start(|c| c == '0' || c == '1') {
625 let diag = self.read_bindigits();
626 self.maybe_bigint();
627 diag
628 } else {
629 self.next();
630 None
631 }
632 }
633 Some(b'o') | Some(b'O') => {
634 if self.special_number_start(|c| ('0'..='7').contains(&c)) {
635 let diag = self.read_octaldigits();
636 self.maybe_bigint();
637 diag
638 } else {
639 self.next();
640 None
641 }
642 }
643 Some(b'n') => {
644 self.cur += 2;
645 None
646 }
647 Some(b'.') => {
648 self.cur += 1;
649 self.read_float()
650 }
651 Some(b'e') | Some(b'E') => {
652 match self.bytes.get(self.cur + 2) {
654 Some(b'-') | Some(b'+') => {
655 if let Some(b'0'..=b'9') = self.bytes.get(self.cur + 3) {
656 self.next();
657 self.read_exponent()
658 } else {
659 None
660 }
661 }
662 Some(b'0'..=b'9') => self.read_exponent(),
663 _ => {
664 self.next();
665 None
666 }
667 }
668 }
669 _ => self.read_number(),
672 }
673 }
674
675 #[inline]
676 fn read_hexnumber(&mut self) -> Option<Diagnostic> {
677 let mut diag = None;
678 unwind_loop! {
679 match self.next() {
680 Some(b'_') => diag = diag.or(self.handle_numeric_separator(16)),
681 Some(b) if char::from(*b).is_ascii_hexdigit() => {},
682 _ => return diag,
683 }
684 }
685 }
686
687 #[inline]
688 fn handle_numeric_separator(&mut self, radix: u8) -> Option<Diagnostic> {
689 debug_assert_eq!(self.bytes[self.cur], b'_');
690
691 let err_diag = Diagnostic::error(
692 self.file_id,
693 "",
694 "numeric separators are only allowed between two digits",
695 )
696 .primary(self.cur..self.cur + 1, "");
697
698 let peeked = self.bytes.get(self.cur + 1).copied();
699
700 if peeked.is_none() || !char::from(peeked.unwrap()).is_digit(radix as u32) {
701 return Some(err_diag);
702 }
703
704 let forbidden = |c: Option<u8>| {
705 if c.is_none() {
706 return true;
707 }
708 let c = c.unwrap();
709
710 if radix == 16 {
711 matches!(c, b'.' | b'X' | b'_' | b'x')
712 } else {
713 matches!(c, b'.' | b'B' | b'E' | b'O' | b'_' | b'b' | b'e' | b'o')
714 }
715 };
716
717 let prev = self.bytes.get(self.cur - 1).copied();
718
719 if forbidden(prev) || forbidden(peeked) {
720 return Some(err_diag);
721 }
722
723 self.next_bounded();
724 None
725 }
726
727 #[inline]
730 fn read_number(&mut self) -> Option<Diagnostic> {
731 let mut diag = None;
732 unwind_loop! {
733 match self.next_bounded() {
734 Some(b'_') => diag = diag.or(self.handle_numeric_separator(10)),
735 Some(b'0'..=b'9') => {},
736 Some(b'.') => {
737 return self.read_float();
738 },
739 Some(b'e') | Some(b'E') => {
741 match self.bytes.get(self.cur + 1) {
743 Some(b'-') | Some(b'+') => {
744 if let Some(b'0'..=b'9') = self.bytes.get(self.cur + 2) {
745 self.next();
746 return self.read_exponent();
747 } else {
748 return diag;
749 }
750 },
751 Some(b'0'..=b'9') => return self.read_exponent(),
752 _ => return diag,
753 }
754 },
755 Some(b'n') => {
756 self.next();
757 return diag;
758 }
759 _ => return diag,
760 }
761 }
762 }
763
764 #[inline]
765 fn read_float(&mut self) -> Option<Diagnostic> {
766 let mut diag = None;
767
768 unwind_loop! {
769 match self.next_bounded() {
770 Some(b'_') => diag = diag.or(self.handle_numeric_separator(16)),
771 Some(b'0'..=b'9') => {},
774 Some(b'e') | Some(b'E') => {
775 match self.bytes.get(self.cur + 1) {
777 Some(b'-') | Some(b'+') => {
778 if let Some(b'0'..=b'9') = self.bytes.get(self.cur + 2) {
779 self.next();
780 return self.read_exponent().or(diag);
781 } else {
782 return diag;
783 }
784 },
785 Some(b'0'..=b'9') => return self.read_exponent().or(diag),
786 _ => return diag,
787 }
788 },
789 _ => return diag,
790 }
791 }
792 }
793
794 #[inline]
795 fn read_exponent(&mut self) -> Option<Diagnostic> {
796 if let Some(b'-') | Some(b'+') = self.bytes.get(self.cur + 1) {
797 self.next();
798 }
799
800 let mut diag = None;
801 unwind_loop! {
802 match self.next() {
803 Some(b'_') => diag = diag.or(self.handle_numeric_separator(10)),
804 Some(b'0'..=b'9') => {},
805 _ => return diag,
806 }
807 }
808 }
809
810 #[inline]
811 fn read_bindigits(&mut self) -> Option<Diagnostic> {
812 let mut diag = None;
813 unwind_loop! {
814 match self.next() {
815 Some(b'_') => diag = diag.or(self.handle_numeric_separator(2)),
816 Some(b'0') | Some(b'1') => {},
817 _ => return diag,
818 }
819 }
820 }
821
822 #[inline]
823 fn read_octaldigits(&mut self) -> Option<Diagnostic> {
824 let mut diag = None;
825 unwind_loop! {
826 match self.next() {
827 Some(b'_') => diag = diag.or(self.handle_numeric_separator(8)),
828 Some(b'0'..=b'7') => {},
829 _ => return diag,
830 }
831 }
832 }
833
834 #[inline]
835 fn verify_number_end(&mut self, start: usize) -> LexerReturn {
836 let err_start = self.cur;
837 if self.cur < self.bytes.len() && self.cur_is_ident_start() {
838 self.consume_ident();
839 let err = Diagnostic::error(
840 self.file_id,
841 "",
842 "numbers cannot be followed by identifiers directly after",
843 )
844 .primary(err_start..self.cur, "an identifier cannot appear here");
845
846 (
847 Token::new(SyntaxKind::ERROR_TOKEN, self.cur - start),
848 Some(err),
849 )
850 } else {
851 tok!(NUMBER, self.cur - start)
852 }
853 }
854
855 #[inline]
856 fn read_shebang(&mut self) -> LexerReturn {
857 let start = self.cur;
858 self.next();
859 if start != 0 {
860 return (Token::new(T![#], 1), None);
861 }
862
863 if let Some(b'!') = self.bytes.get(1) {
864 while self.next().is_some() {
865 let chr = self.get_unicode_char();
866
867 if is_linebreak(chr) {
868 return tok!(SHEBANG, self.cur);
869 }
870 self.cur += chr.len_utf8() - 1;
871 }
872 tok!(SHEBANG, self.cur)
873 } else {
874 let err = Diagnostic::error(
875 self.file_id,
876 "",
877 "expected `!` following a `#`, but found none",
878 )
879 .primary(0usize..1usize, "");
880
881 (Token::new(SyntaxKind::ERROR_TOKEN, 1), Some(err))
882 }
883 }
884
885 #[inline]
886 fn read_slash(&mut self) -> LexerReturn {
887 let start = self.cur;
888 match self.bytes.get(self.cur + 1) {
889 Some(b'*') => {
890 self.next();
891 while let Some(b) = self.next().copied() {
892 match b {
893 b'*' if self.bytes.get(self.cur + 1) == Some(&b'/') => {
894 self.advance(2);
895 return tok!(COMMENT, self.cur - start);
896 }
897 _ => {}
898 }
899 }
900
901 let err = Diagnostic::error(self.file_id, "", "unterminated block comment")
902 .primary(self.cur..self.cur + 1, "... but the file ends here")
903 .secondary(start..start + 2, "A block comment starts here");
904
905 (Token::new(SyntaxKind::COMMENT, self.cur - start), Some(err))
906 }
907 Some(b'/') => {
908 self.next();
909 while self.next().is_some() {
910 let chr = self.get_unicode_char();
911
912 if is_linebreak(chr) {
913 return tok!(COMMENT, self.cur - start);
914 }
915 self.cur += chr.len_utf8() - 1;
916 }
917 tok!(COMMENT, self.cur - start)
918 }
919 _ if self.state.expr_allowed => self.read_regex(),
920 Some(b'=') => {
921 self.advance(2);
922 tok!(SLASHEQ, self.cur - start)
923 }
924 _ => self.eat(tok![/]),
925 }
926 }
927
928 #[inline]
929 fn flag_err(&self, flag: char) -> Diagnostic {
930 Diagnostic::error(self.file_id, "", format!("duplicate flag `{}`", flag))
931 .primary(self.cur..self.cur + 1, "this flag was already used")
932 }
933
934 #[inline]
937 #[allow(clippy::many_single_char_names)]
938 fn read_regex(&mut self) -> LexerReturn {
939 let start = self.cur;
940 let mut in_class = false;
941 let mut diagnostic = None;
942
943 unwind_loop! {
944 match self.next() {
945 Some(b'[') => in_class = true,
946 Some(b']') => in_class = false,
947 Some(b'/') => {
948 if !in_class {
949 let (mut g, mut i, mut m, mut s, mut u, mut y) = (false, false, false, false, false, false);
950
951 unwind_loop! {
952 let next = self.next_bounded().copied();
953 let chr_start = self.cur;
954 match next {
955 Some(b'g') => {
956 if g && diagnostic.is_none() {
957 diagnostic = Some(self.flag_err('g'))
958 }
959 g = true;
960 },
961 Some(b'i') => {
962 if i && diagnostic.is_none() {
963 diagnostic = Some(self.flag_err('i'))
964 }
965 i = true;
966 },
967 Some(b'm') => {
968 if m && diagnostic.is_none() {
969 diagnostic = Some(self.flag_err('m'))
970 }
971 m = true;
972 },
973 Some(b's') => {
974 if s && diagnostic.is_none() {
975 diagnostic = Some(self.flag_err('s'))
976 }
977 s = true;
978 },
979 Some(b'u') => {
980 if u && diagnostic.is_none() {
981 diagnostic = Some(self.flag_err('u'))
982 }
983 u = true;
984 },
985 Some(b'y') => {
986 if y && diagnostic.is_none() {
987 diagnostic = Some(self.flag_err('y'))
988 }
989 y = true;
990 },
991 Some(_) if self.cur_ident_part().is_some() => {
992 if diagnostic.is_none() {
993 diagnostic = Some(Diagnostic::error(self.file_id, "", "invalid regex flag")
994 .primary(chr_start .. self.cur + 1, "this is not a valid regex flag"));
995 }
996 },
997 _ => {
998 return (Token::new(SyntaxKind::REGEX, self.cur - start), diagnostic)
999 }
1000 }
1001 }
1002 }
1003 },
1004 Some(b'\\') => {
1005 if self.next_bounded().is_none() {
1006 let err = Diagnostic::error(self.file_id, "", "expected a character after a regex escape, but found none")
1007 .primary(self.cur..self.cur + 1, "expected a character following this");
1008
1009 return (Token::new(SyntaxKind::REGEX, self.cur - start), Some(err));
1010 }
1011 },
1012 None => {
1013 let err = Diagnostic::error(self.file_id, "", "unterminated regex literal")
1014 .primary(self.cur..self.cur, "...but the file ends here")
1015 .secondary(start..start + 1, "a regex literal starts there...");
1016
1017 return (Token::new(SyntaxKind::REGEX, self.cur - start), Some(err));
1018 },
1019 _ => {},
1020 }
1021 }
1022 }
1023
1024 #[inline]
1025 fn bin_or_assign(&mut self, bin: SyntaxKind, assign: SyntaxKind) -> LexerReturn {
1026 if let Some(b'=') = self.next() {
1027 self.next();
1028 (Token::new(assign, 2), None)
1029 } else {
1030 (Token::new(bin, 1), None)
1031 }
1032 }
1033
1034 #[inline]
1035 fn resolve_bang(&mut self) -> LexerReturn {
1036 match self.next() {
1037 Some(b'=') => {
1038 if let Some(b'=') = self.next() {
1039 self.next();
1040 tok!(NEQ2, 3)
1041 } else {
1042 tok!(NEQ, 2)
1043 }
1044 }
1045 _ => tok!(!),
1046 }
1047 }
1048
1049 #[inline]
1050 fn resolve_amp(&mut self) -> LexerReturn {
1051 match self.next() {
1052 Some(b'&') => {
1053 if let Some(b'=') = self.next() {
1054 self.next();
1055 tok!(AMP2EQ, 3)
1056 } else {
1057 tok!(AMP2, 2)
1058 }
1059 }
1060 Some(b'=') => {
1061 self.next();
1062 tok!(AMPEQ, 2)
1063 }
1064 _ => tok!(&),
1065 }
1066 }
1067
1068 #[inline]
1069 fn resolve_plus(&mut self) -> LexerReturn {
1070 match self.next() {
1071 Some(b'+') => {
1072 self.next();
1073 tok!(PLUS2, 2)
1074 }
1075 Some(b'=') => {
1076 self.next();
1077 tok!(PLUSEQ, 2)
1078 }
1079 _ => tok!(+),
1080 }
1081 }
1082
1083 #[inline]
1084 fn resolve_minus(&mut self) -> LexerReturn {
1085 match self.next() {
1086 Some(b'-') => {
1087 self.next();
1088 tok!(MINUS2, 2)
1089 }
1090 Some(b'=') => {
1091 self.next();
1092 tok!(MINUSEQ, 2)
1093 }
1094 _ => tok!(-),
1095 }
1096 }
1097
1098 #[inline]
1099 fn resolve_less_than(&mut self) -> LexerReturn {
1100 match self.next() {
1101 Some(b'<') => {
1102 if let Some(b'=') = self.next() {
1103 self.next();
1104 tok!(SHLEQ, 3)
1105 } else {
1106 tok!(SHL, 2)
1107 }
1108 }
1109 Some(b'=') => {
1110 self.next();
1111 tok!(LTEQ, 2)
1112 }
1113 _ => tok!(<),
1114 }
1115 }
1116
1117 #[inline]
1118 fn resolve_greater_than(&mut self) -> LexerReturn {
1119 match self.next() {
1120 Some(b'>') => {
1121 if let Some(b'>') = self.bytes.get(self.cur + 1).copied() {
1122 if let Some(b'=') = self.bytes.get(self.cur + 2).copied() {
1123 self.advance(3);
1124 tok!(USHREQ, 4)
1125 } else {
1126 tok!(>)
1127 }
1128 } else if self.bytes.get(self.cur + 1).copied() == Some(b'=') {
1129 self.advance(2);
1130 tok!(SHREQ, 3)
1131 } else {
1132 tok!(>)
1133 }
1134 }
1135 Some(b'=') => {
1136 self.next();
1137 tok!(GTEQ, 2)
1138 }
1139 _ => tok!(>),
1140 }
1141 }
1142
1143 #[inline]
1144 fn resolve_eq(&mut self) -> LexerReturn {
1145 match self.next() {
1146 Some(b'=') => {
1147 if let Some(b'=') = self.next() {
1148 self.next();
1149 tok!(EQ3, 3)
1150 } else {
1151 tok!(EQ2, 2)
1152 }
1153 }
1154 Some(b'>') => {
1155 self.next();
1156 tok!(FAT_ARROW, 2)
1157 }
1158 _ => tok!(=),
1159 }
1160 }
1161
1162 #[inline]
1163 fn resolve_pipe(&mut self) -> LexerReturn {
1164 match self.next() {
1165 Some(b'|') => {
1166 if let Some(b'=') = self.next() {
1167 self.next();
1168 tok!(PIPE2EQ, 3)
1169 } else {
1170 tok!(PIPE2, 2)
1171 }
1172 }
1173 Some(b'=') => {
1174 self.next();
1175 tok!(PIPEEQ, 2)
1176 }
1177 _ => tok!(|),
1178 }
1179 }
1180
1181 #[inline]
1183 fn resolve_question(&mut self) -> LexerReturn {
1184 match self.next() {
1185 Some(b'?') => {
1186 if let Some(b'=') = self.next() {
1187 self.next();
1188 tok!(QUESTION2EQ, 3)
1189 } else {
1190 tok!(QUESTION2, 2)
1191 }
1192 }
1193 Some(b'.') => {
1194 if let Some(b'0'..=b'9') = self.bytes.get(self.cur + 1) {
1196 tok!(?)
1197 } else {
1198 self.next();
1199 tok!(QUESTIONDOT, 2)
1200 }
1201 }
1202 _ => tok!(?),
1203 }
1204 }
1205
1206 #[inline]
1207 fn resolve_star(&mut self) -> LexerReturn {
1208 match self.next() {
1209 Some(b'*') => {
1210 if let Some(b'=') = self.next() {
1211 self.next();
1212 tok!(STAR2EQ, 3)
1213 } else {
1214 tok!(STAR2, 2)
1215 }
1216 }
1217 Some(b'=') => {
1218 self.next();
1219 tok!(STAREQ, 2)
1220 }
1221 _ => tok!(*),
1222 }
1223 }
1224
1225 fn lex_token(&mut self) -> LexerReturn {
1227 let byte = unsafe { *self.bytes.get_unchecked(self.cur) };
1229 let start = self.cur;
1230
1231 let dispatched = Self::lookup(byte);
1236
1237 match dispatched {
1238 WHS => {
1239 self.consume_whitespace();
1240 tok!(WHITESPACE, self.cur - start)
1241 }
1242 EXL => self.resolve_bang(),
1243 HAS => self.read_shebang(),
1244 PRC => self.bin_or_assign(T![%], T![%=]),
1245 AMP => self.resolve_amp(),
1246 PNO => self.eat(tok!(L_PAREN, 1)),
1247 PNC => self.eat(tok!(R_PAREN, 1)),
1248 MUL => self.resolve_star(),
1249 PLS => self.resolve_plus(),
1250 COM => self.eat(tok![,]),
1251 MIN => self.resolve_minus(),
1252 SLH => self.read_slash(),
1253 TPL => self.eat(tok!(BACKTICK, 1)),
1255 ZER => {
1256 let diag = self.read_zero();
1257 let (token, err) = self.verify_number_end(start);
1258 (token, err.or(diag))
1259 }
1260 PRD => {
1261 if let Some(b"..") = self.bytes.get(self.cur + 1..self.cur + 3) {
1262 self.cur += 3;
1263 return tok!(DOT2, 3);
1264 }
1265 if let Some(b'0'..=b'9') = self.bytes.get(self.cur + 1) {
1266 let diag = self.read_float();
1267 let (token, err) = self.verify_number_end(start);
1268 (token, err.or(diag))
1269 } else {
1270 self.eat(tok![.])
1271 }
1272 }
1273 BSL => {
1274 if self.bytes.get(self.cur + 1) == Some(&b'u') {
1275 self.next();
1276 let res = if self.bytes.get(self.cur + 1).copied() == Some(b'{') {
1277 self.next();
1278 self.read_codepoint_escape()
1279 } else {
1280 self.read_unicode_escape(true)
1281 };
1282
1283 match res {
1284 Ok(chr) => {
1285 if is_id_start(chr) {
1286 self.resolve_identifier((chr, start))
1287 } else {
1288 let err = Diagnostic::error(self.file_id, "", "unexpected unicode escape")
1289 .primary(start..self.cur, "this escape is unexpected, as it does not designate the start of an identifier");
1290
1291 self.next();
1292 (
1293 Token::new(SyntaxKind::ERROR_TOKEN, self.cur - start),
1294 Some(err),
1295 )
1296 }
1297 }
1298 Err(err) => (
1299 Token::new(SyntaxKind::ERROR_TOKEN, self.cur - start),
1300 Some(err),
1301 ),
1302 }
1303 } else {
1304 let err = Diagnostic::error(
1305 self.file_id,
1306 "",
1307 format!("unexpected token `{}`", byte as char),
1308 )
1309 .primary(start..self.cur + 1, "");
1310 self.next();
1311
1312 (Token::new(SyntaxKind::ERROR_TOKEN, 1), Some(err))
1313 }
1314 }
1315 QOT => {
1316 if let Some(err) = self.read_str_literal() {
1317 (
1318 Token::new(SyntaxKind::ERROR_TOKEN, self.cur - start),
1319 Some(err),
1320 )
1321 } else {
1322 tok!(STRING, self.cur - start)
1323 }
1324 }
1325 IDT => self.resolve_identifier((byte as char, start)),
1326 DIG => {
1327 let diag = self.read_number();
1328 let (token, err) = self.verify_number_end(start);
1329 (token, err.or(diag))
1330 }
1331 COL => self.eat(tok![:]),
1332 SEM => self.eat(tok![;]),
1333 LSS => self.resolve_less_than(),
1334 EQL => self.resolve_eq(),
1335 MOR => self.resolve_greater_than(),
1336 QST => self.resolve_question(),
1337 BTO => self.eat(tok!(L_BRACK, 1)),
1338 BTC => self.eat(tok![R_BRACK, 1]),
1339 CRT => self.bin_or_assign(T![^], T![^=]),
1340 BEO => self.eat(tok![L_CURLY, 1]),
1341 BEC => self.eat(tok![R_CURLY, 1]),
1342 PIP => self.resolve_pipe(),
1343 TLD => self.eat(tok![~]),
1344 UNI => {
1345 let chr = self.get_unicode_char();
1346 if UNICODE_WHITESPACE_STARTS.contains(&byte)
1347 && (is_linebreak(chr) || UNICODE_SPACES.contains(&chr))
1348 {
1349 if is_linebreak(chr) {
1350 self.state.had_linebreak = true;
1351 }
1352
1353 self.cur += chr.len_utf8() - 1;
1354 self.consume_whitespace();
1355 tok!(WHITESPACE, self.cur - start)
1356 } else {
1357 self.cur += chr.len_utf8() - 1;
1358 if is_id_start(chr) {
1359 self.resolve_identifier((chr, start))
1360 } else {
1361 let err = Diagnostic::error(
1362 self.file_id,
1363 "",
1364 format!("Unexpected token `{}`", chr as char),
1365 )
1366 .primary(start..self.cur + 1, "");
1367 self.next();
1368
1369 (
1370 Token::new(SyntaxKind::ERROR_TOKEN, self.cur - start),
1371 Some(err),
1372 )
1373 }
1374 }
1375 }
1376 AT_ => self.eat(tok![@]),
1377 _ => {
1378 let err = Diagnostic::error(
1379 self.file_id,
1380 "",
1381 format!("unexpected token `{}`", byte as char),
1382 )
1383 .primary(start..self.cur + 1, "");
1384 self.next();
1385
1386 (Token::new(SyntaxKind::ERROR_TOKEN, 1), Some(err))
1387 }
1388 }
1389 }
1390
1391 fn lex_template(&mut self) -> LexerReturn {
1392 let start = self.cur;
1393 let mut diagnostic = None;
1394
1395 while let Some(b) = self.bytes.get(self.cur) {
1396 match *b as char {
1397 '`' if self.cur == start => {
1398 self.next();
1399 return tok!(BACKTICK, 1);
1400 }
1401 '`' => {
1402 return (
1403 Token::new(SyntaxKind::TEMPLATE_CHUNK, self.cur - start),
1404 diagnostic,
1405 );
1406 }
1407 '\\' => {
1408 if let Some(err) = self.validate_escape_sequence() {
1409 diagnostic = Some(err);
1410 }
1411 self.next_bounded();
1412 }
1413 '$' if self.bytes.get(self.cur + 1) == Some(&b'{') && self.cur == start => {
1414 self.advance(2);
1415 return (Token::new(SyntaxKind::DOLLARCURLY, 2), diagnostic);
1416 }
1417 '$' if self.bytes.get(self.cur + 1) == Some(&b'{') => {
1418 return (
1419 Token::new(SyntaxKind::TEMPLATE_CHUNK, self.cur - start),
1420 diagnostic,
1421 )
1422 }
1423 _ => {
1424 let _ = self.next();
1425 }
1426 }
1427 }
1428
1429 let err = Diagnostic::error(self.file_id, "", "unterminated template literal")
1430 .primary(self.cur..self.cur + 1, "");
1431
1432 (
1433 Token::new(SyntaxKind::TEMPLATE_CHUNK, self.cur - start),
1434 Some(err),
1435 )
1436 }
1437}
1438
1439pub fn is_linebreak(chr: char) -> bool {
1441 ['\n', '\r', '\u{2028}', '\u{2029}'].contains(&chr)
1442}
1443
1444impl Iterator for Lexer<'_> {
1445 type Item = LexerReturn;
1446
1447 fn next(&mut self) -> Option<Self::Item> {
1448 if self.cur >= self.bytes.len() {
1449 if !self.returned_eof {
1450 self.returned_eof = true;
1451 return Some(tok!(EOF, 0));
1452 }
1453 return None;
1454 }
1455
1456 let token = if self.state.is_in_template() {
1457 self.lex_template()
1458 } else {
1459 self.lex_token()
1460 };
1461
1462 if ![
1463 SyntaxKind::COMMENT,
1464 SyntaxKind::WHITESPACE,
1465 SyntaxKind::TEMPLATE_CHUNK,
1466 ]
1467 .contains(&token.0.kind)
1468 {
1469 self.state.update(token.0.kind);
1470 }
1471 Some(token)
1472 }
1473}
1474
1475#[allow(non_camel_case_types, clippy::upper_case_acronyms)]
1477#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
1478#[repr(u8)]
1479enum Dispatch {
1480 ERR,
1481 WHS,
1482 EXL,
1483 QOT,
1484 IDT,
1485 HAS,
1486 PRC,
1487 AMP,
1488 PNO,
1489 PNC,
1490 MUL,
1491 PLS,
1492 COM,
1493 MIN,
1494 PRD,
1495 SLH,
1496 ZER,
1497 DIG,
1498 COL,
1499 SEM,
1500 LSS,
1501 EQL,
1502 MOR,
1503 QST,
1504 AT_,
1505 BTO,
1506 BSL,
1507 BTC,
1508 CRT,
1509 TPL,
1510 BEO,
1511 PIP,
1512 BEC,
1513 TLD,
1514 UNI,
1515}
1516use Dispatch::*;
1517
1518static DISPATCHER: [Dispatch; 256] = [
1522 ERR, ERR, ERR, ERR, ERR, ERR, ERR, ERR, ERR, WHS, WHS, WHS, WHS, WHS, ERR, ERR, ERR, ERR, ERR, ERR, ERR, ERR, ERR, ERR, ERR, ERR, ERR, ERR, ERR, ERR, ERR, ERR, WHS, EXL, QOT, HAS, IDT, PRC, AMP, QOT, PNO, PNC, MUL, PLS, COM, MIN, PRD, SLH, ZER, DIG, DIG, DIG, DIG, DIG, DIG, DIG, DIG, DIG, COL, SEM, LSS, EQL, MOR, QST, AT_, IDT, IDT, IDT, IDT, IDT, IDT, IDT, IDT, IDT, IDT, IDT, IDT, IDT, IDT, IDT, IDT, IDT, IDT, IDT, IDT, IDT, IDT, IDT, IDT, IDT, IDT, BTO, BSL, BTC, CRT, IDT, TPL, IDT, IDT, IDT, IDT, IDT, IDT, IDT, IDT, IDT, IDT, IDT, IDT, IDT, IDT, IDT, IDT, IDT, IDT, IDT, IDT, IDT, IDT, IDT, IDT, IDT, IDT, BEO, PIP, BEC, TLD, ERR, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, ];