#[macro_use]
mod token;
mod highlight;
mod labels;
mod state;
mod tests;
#[rustfmt::skip]
mod tables;
pub use token::Token;
#[cfg(feature = "highlight")]
pub use highlight::*;
use rslint_errors::Diagnostic;
use state::LexerState;
use tables::derived_property::*;
pub use rslint_syntax::*;
pub type LexerReturn = (Token, Option<Diagnostic>);
macro_rules! unwind_loop {
($($iter:tt)*) => {
$($iter)*
$($iter)*
$($iter)*
$($iter)*
$($iter)*
loop {
$($iter)*
$($iter)*
$($iter)*
$($iter)*
$($iter)*
}
};
}
const UNICODE_WHITESPACE_STARTS: [u8; 5] = [
0xC2,
0xEF,
0xE1,
0xE2,
0xE3,
];
const UNICODE_SPACES: [char; 16] = [
'\u{00A0}', '\u{1680}', '\u{2000}', '\u{2001}', '\u{2002}', '\u{2003}', '\u{2004}', '\u{2005}',
'\u{2006}', '\u{2007}', '\u{2008}', '\u{2009}', '\u{200A}', '\u{202F}', '\u{205F}', '\u{3000}',
];
fn is_id_start(c: char) -> bool {
ID_Start(c)
}
fn is_id_continue(c: char) -> bool {
ID_Continue(c)
}
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub struct Lexer<'src> {
bytes: &'src [u8],
cur: usize,
state: LexerState,
pub file_id: usize,
returned_eof: bool,
}
impl<'src> Lexer<'src> {
pub unsafe fn from_bytes(bytes: &'src [u8], file_id: usize) -> Self {
Self {
bytes,
cur: 0,
file_id,
state: LexerState::new(),
returned_eof: false,
}
}
pub fn from_str(string: &'src str, file_id: usize) -> Self {
Self {
bytes: string.as_bytes(),
cur: 0,
file_id,
state: LexerState::new(),
returned_eof: false,
}
}
fn eat(&mut self, tok: LexerReturn) -> LexerReturn {
self.next();
tok
}
fn consume_whitespace(&mut self) {
unwind_loop! {
if let Some(byte) = self.next().copied() {
if DISPATCHER[byte as usize] != Dispatch::WHS {
if byte > 0xC1 && UNICODE_WHITESPACE_STARTS.contains(&byte) {
let chr = self.get_unicode_char();
if is_linebreak(chr) {
self.state.had_linebreak = true;
}
if !UNICODE_SPACES.contains(&chr) {
return;
}
self.cur += chr.len_utf8() - 1;
} else {
return;
}
}
if is_linebreak(byte as char) {
self.state.had_linebreak = true;
}
} else {
return;
}
}
}
fn get_unicode_char(&self) -> char {
debug_assert!(self.cur < self.bytes.len());
let string =
unsafe { std::str::from_utf8_unchecked(&self.bytes.get_unchecked(self.cur..)) };
let chr = if let Some(chr) = string.chars().next() {
chr
} else {
unsafe {
core::hint::unreachable_unchecked();
}
};
chr
}
#[inline]
fn next(&mut self) -> Option<&u8> {
self.cur += 1;
self.bytes.get(self.cur)
}
#[inline]
fn next_bounded(&mut self) -> Option<&u8> {
if let Some(b) = self.bytes.get(self.cur + 1) {
self.cur += 1;
Some(b)
} else {
if self.cur != self.bytes.len() {
self.cur += 1;
}
None
}
}
fn advance(&mut self, amount: usize) {
self.cur += amount;
}
fn lookup(byte: u8) -> Dispatch {
unsafe { *DISPATCHER.get_unchecked(byte as usize) }
}
fn read_codepoint_escape(&mut self) -> Result<char, Diagnostic> {
let start = self.cur + 1;
self.read_hexnumber();
if self.bytes.get(self.cur) != Some(&b'}') {
let invalid = self.get_unicode_char();
let err = Diagnostic::error(self.file_id, "", "expected hex digits for a unicode code point escape, but encountered an invalid character")
.primary(self.cur .. invalid.len_utf8(), "");
return Err(err);
}
let digits_str = unsafe {
debug_assert!(self.bytes.get(start..self.cur).is_some());
debug_assert!(std::str::from_utf8(self.bytes.get_unchecked(start..self.cur)).is_ok());
std::str::from_utf8_unchecked(self.bytes.get_unchecked(start..self.cur))
};
match u32::from_str_radix(digits_str, 16) {
Ok(digits) if digits <= 0x10FFFF => {
let res = std::char::from_u32(digits);
if let Some(chr) = res {
Ok(chr)
} else {
let err =
Diagnostic::error(self.file_id, "", "invalid codepoint for unicode escape")
.primary(start..self.cur, "");
Err(err)
}
}
_ => {
let err = Diagnostic::error(
self.file_id,
"",
"out of bounds codepoint for unicode codepoint escape sequence",
)
.primary(start..self.cur, "")
.footer_note("Codepoints range from 0 to 0x10FFFF (1114111)");
Err(err)
}
}
}
fn read_unicode_escape(&mut self, advance: bool) -> Result<char, Diagnostic> {
debug_assert_eq!(self.bytes[self.cur], b'u');
let diagnostic = Diagnostic::error(
self.file_id,
"",
"invalid digits after unicode escape sequence",
)
.primary(
self.cur - 1..self.cur + 1,
"expected 4 hex digits following this",
);
for idx in 0..4 {
match self.next_bounded() {
None => {
if !advance {
self.cur -= idx + 1;
}
return Err(diagnostic);
}
Some(b) if !(*b as u8).is_ascii_hexdigit() => {
if !advance {
self.cur -= idx + 1;
}
return Err(diagnostic);
}
_ => {}
}
}
unsafe {
let digits_str = std::str::from_utf8_unchecked(
self.bytes.get_unchecked((self.cur - 3)..(self.cur + 1)),
);
if let Ok(digits) = u32::from_str_radix(digits_str, 16) {
if !advance {
self.cur -= 4;
}
Ok(std::char::from_u32_unchecked(digits))
} else {
core::hint::unreachable_unchecked();
}
}
}
fn validate_hex_escape(&mut self) -> Option<Diagnostic> {
debug_assert_eq!(self.bytes[self.cur], b'x');
let diagnostic =
Diagnostic::error(self.file_id, "", "invalid digits after hex escape sequence")
.primary(
(self.cur - 1)..(self.cur + 1),
"Expected 2 hex digits following this",
);
for _ in 0..2 {
match self.next_bounded() {
None => return Some(diagnostic),
Some(b) if !(*b as u8).is_ascii_hexdigit() => return Some(diagnostic),
_ => {}
}
}
None
}
fn validate_escape_sequence(&mut self) -> Option<Diagnostic> {
let cur = self.cur;
if let Some(escape) = self.bytes.get(self.cur + 1) {
match escape {
b'u' if self.bytes.get(self.cur + 2) == Some(&b'{') => {
self.advance(2);
self.read_codepoint_escape().err()
}
b'u' => {
self.next();
self.read_unicode_escape(true).err()
}
b'x' => {
self.next();
self.validate_hex_escape()
}
_ => {
let chr = self.get_unicode_char();
self.cur += chr.len_utf8();
None
}
}
} else {
Some(Diagnostic::error(self.file_id, "", "").primary(
cur..cur + 1,
"expected an escape sequence following a backslash, but found none",
))
}
}
#[inline]
fn consume_ident(&mut self) {
unwind_loop! {
if self.next_bounded().is_some() {
if !self.cur_is_ident_part() {
return;
}
} else {
return;
}
}
}
fn read_str_literal(&mut self) -> Option<Diagnostic> {
let quote = unsafe { *self.bytes.get_unchecked(self.cur) };
let start = self.cur;
let mut diagnostic = None;
while let Some(byte) = self.next_bounded() {
match *byte {
b'\\' => {
diagnostic = self.validate_escape_sequence();
}
b if b == quote => {
self.next();
return diagnostic;
}
_ => {}
}
}
let unterminated = Diagnostic::error(self.file_id, "", "unterminated string literal")
.primary(self.cur..self.cur, "input ends here")
.secondary(start..start + 1, "string literal starts here");
Some(unterminated)
}
#[inline]
fn cur_is_ident_part(&mut self) -> bool {
debug_assert!(self.cur < self.bytes.len());
let b = unsafe { self.bytes.get_unchecked(self.cur) };
match Self::lookup(*b) {
IDT | DIG | ZER | L_A | L_B | L_C | L_D | L_E | L_F | L_I | L_N | L_R | L_S | L_T
| L_V | L_W | L_Y => true,
UNI => {
let res = is_id_continue(self.get_unicode_char());
if res {
self.cur += self.get_unicode_char().len_utf8() - 1;
}
res
}
BSL if self.bytes.get(self.cur + 1) == Some(&b'u') => {
let start = self.cur;
self.next();
let res = if self.bytes.get(self.cur + 1).copied() == Some(b'{') {
self.next();
self.read_codepoint_escape()
} else {
self.read_unicode_escape(true)
};
if let Ok(c) = res {
if is_id_continue(c) {
self.cur += c.len_utf8() - 1;
true
} else {
self.cur -= 1;
false
}
} else {
self.cur = start;
false
}
}
_ => false,
}
}
#[inline]
fn cur_is_ident_start(&mut self) -> bool {
debug_assert!(self.cur < self.bytes.len());
let b = unsafe { self.bytes.get_unchecked(self.cur) };
match Self::lookup(*b) {
BSL if self.bytes.get(self.cur + 1) == Some(&b'u') => {
self.next();
if let Ok(chr) = self.read_unicode_escape(false) {
if is_id_start(chr) {
self.advance(5);
return true;
}
}
self.cur -= 1;
false
}
UNI => {
let chr = self.get_unicode_char();
if is_id_start(chr) {
self.cur += chr.len_utf8() - 1;
true
} else {
false
}
}
IDT | L_A | L_B | L_C | L_D | L_E | L_F | L_I | L_N | L_R | L_S | L_T | L_V | L_W
| L_Y => true,
_ => false,
}
}
#[inline]
fn resolve_label(&mut self, label: Dispatch) -> LexerReturn {
let start = self.cur;
let kind = match label {
L_A => self.resolve_label_a(),
L_B => self.resolve_label_b(),
L_C => self.resolve_label_c(),
L_D => self.resolve_label_d(),
L_E => self.resolve_label_e(),
L_F => self.resolve_label_f(),
L_I => self.resolve_label_i(),
L_N => self.resolve_label_n(),
L_R => self.resolve_label_r(),
L_S => self.resolve_label_s(),
L_T => self.resolve_label_t(),
L_V => self.resolve_label_v(),
L_W => self.resolve_label_w(),
L_Y => self.resolve_label_y(),
_ => unsafe { core::hint::unreachable_unchecked() },
};
if let Some(syntax_kind) = kind {
if self.next_bounded().is_some() {
if self.cur_is_ident_part() {
self.consume_ident();
(Token::new(T![ident], self.cur - start), None)
} else {
(Token::new(syntax_kind, self.cur - start), None)
}
} else {
(Token::new(syntax_kind, self.cur - start), None)
}
} else {
self.consume_ident();
(Token::new(T![ident], self.cur - start), None)
}
}
#[inline]
fn special_number_start<F: Fn(char) -> bool>(&mut self, func: F) -> bool {
if self
.bytes
.get(self.cur + 2)
.map(|b| func(*b as char))
.unwrap_or(false)
{
self.cur += 1;
true
} else {
false
}
}
#[inline]
fn maybe_bigint(&mut self) {
if let Some(b'n') = self.bytes.get(self.cur) {
self.next();
}
}
#[inline]
fn read_zero(&mut self) {
match self.bytes.get(self.cur + 1) {
Some(b'x') | Some(b'X') => {
if self.special_number_start(|c| c.is_ascii_hexdigit()) {
self.read_hexnumber();
self.maybe_bigint();
} else {
self.next();
}
}
Some(b'b') | Some(b'B') => {
if self.special_number_start(|c| c == '0' || c == '1') {
self.read_bindigits();
self.maybe_bigint();
} else {
self.next();
}
}
Some(b'o') | Some(b'O') => {
if self.special_number_start(|c| ('0'..='7').contains(&c)) {
self.read_octaldigits();
self.maybe_bigint();
} else {
self.next();
}
}
Some(b'n') => {
self.cur += 2;
}
Some(b'.') => {
self.cur += 1;
self.read_float();
}
Some(b'e') | Some(b'E') => {
match self.bytes.get(self.cur + 2) {
Some(b'-') | Some(b'+') => {
if let Some(b'0'..=b'9') = self.bytes.get(self.cur + 3) {
self.next();
self.read_exponent();
}
}
Some(b'0'..=b'9') => self.read_exponent(),
_ => {
self.next();
}
}
}
_ => self.read_number(),
}
}
#[inline]
fn read_hexnumber(&mut self) {
unwind_loop! {
if let Some(b) = self.next_bounded() {
if !(*b as char).is_ascii_hexdigit() {
return;
}
} else {
return;
}
}
}
#[inline]
fn read_number(&mut self) {
unwind_loop! {
match self.next_bounded() {
Some(b'0'..=b'9') => {},
Some(b'.') => {
return self.read_float();
},
Some(b'e') | Some(b'E') => {
match self.bytes.get(self.cur + 1) {
Some(b'-') | Some(b'+') => {
if let Some(b'0'..=b'9') = self.bytes.get(self.cur + 2) {
self.next();
return self.read_exponent();
} else {
return;
}
},
Some(b'0'..=b'9') => return self.read_exponent(),
_ => return,
}
},
Some(b'n') => {
self.next();
return;
}
_ => return,
}
}
}
#[inline]
fn read_float(&mut self) {
unwind_loop! {
match self.next_bounded() {
Some(b'0'..=b'9') => {},
Some(b'e') | Some(b'E') => {
match self.bytes.get(self.cur + 1) {
Some(b'-') | Some(b'+') => {
if let Some(b'0'..=b'9') = self.bytes.get(self.cur + 2) {
self.next();
return self.read_exponent();
} else {
return;
}
},
Some(b'0'..=b'9') => return self.read_exponent(),
_ => return,
}
},
_ => return,
}
}
}
#[inline]
fn read_exponent(&mut self) {
if let Some(b'-') | Some(b'+') = self.bytes.get(self.cur + 1) {
self.next();
}
unwind_loop! {
if let Some(b'0'..=b'9') = self.next() {
} else {
return;
}
}
}
#[inline]
fn read_bindigits(&mut self) {
unwind_loop! {
if let Some(b'0') | Some(b'1') = self.next() {
} else {
return
}
}
}
#[inline]
fn read_octaldigits(&mut self) {
unwind_loop! {
if let Some(b'0'..=b'7') = self.next() {
} else {
return
}
}
}
#[inline]
fn verify_number_end(&mut self, start: usize) -> LexerReturn {
let err_start = self.cur;
if self.cur < self.bytes.len() && self.cur_is_ident_start() {
self.consume_ident();
let err = Diagnostic::error(
self.file_id,
"",
"numbers cannot be followed by identifiers directly after",
)
.primary(err_start..self.cur, "an identifier cannot appear here");
(
Token::new(SyntaxKind::ERROR_TOKEN, self.cur - start),
Some(err),
)
} else {
tok!(NUMBER, self.cur - start)
}
}
#[inline]
fn read_shebang(&mut self) -> LexerReturn {
let start = self.cur;
self.next();
if start != 0 {
let err =
Diagnostic::error(self.file_id, "", "`#` must be at the beginning of the file")
.primary(start..(start + 1), "but it's found here");
return (Token::new(SyntaxKind::ERROR_TOKEN, 1), Some(err));
}
if let Some(b'!') = self.bytes.get(1) {
while self.next().is_some() {
let chr = self.get_unicode_char();
if is_linebreak(chr) {
return tok!(SHEBANG, self.cur);
}
self.cur += chr.len_utf8() - 1;
}
tok!(SHEBANG, self.cur)
} else {
let err = Diagnostic::error(
self.file_id,
"",
"expected `!` following a `#`, but found none",
)
.primary(0usize..1usize, "");
(Token::new(SyntaxKind::ERROR_TOKEN, 1), Some(err))
}
}
#[inline]
fn read_slash(&mut self) -> LexerReturn {
let start = self.cur;
match self.bytes.get(self.cur + 1) {
Some(b'*') => {
self.next();
while let Some(b) = self.next().copied() {
match b {
b'*' if self.bytes.get(self.cur + 1) == Some(&b'/') => {
self.advance(2);
return tok!(COMMENT, self.cur - start);
}
_ => {}
}
}
let err = Diagnostic::error(self.file_id, "", "unterminated block comment")
.primary(self.cur..self.cur + 1, "... but the file ends here")
.secondary(start..start + 2, "A block comment starts here");
(Token::new(SyntaxKind::COMMENT, self.cur - start), Some(err))
}
Some(b'/') => {
self.next();
while self.next().is_some() {
let chr = self.get_unicode_char();
if is_linebreak(chr) {
return tok!(COMMENT, self.cur - start);
}
self.cur += chr.len_utf8() - 1;
}
tok!(COMMENT, self.cur - start)
}
Some(b'=') => {
self.advance(2);
tok!(SLASHEQ, self.cur - start)
}
_ if self.state.expr_allowed => self.read_regex(),
_ => self.eat(tok![/]),
}
}
#[inline]
fn flag_err(&self, flag: char) -> Diagnostic {
Diagnostic::error(self.file_id, "", format!("duplicate flag `{}`", flag))
.primary(self.cur..self.cur + 1, "this flag was already used")
}
#[inline]
#[allow(clippy::many_single_char_names)]
fn read_regex(&mut self) -> LexerReturn {
let start = self.cur;
let mut in_class = false;
let mut diagnostic = None;
unwind_loop! {
match self.next() {
Some(b'[') => in_class = true,
Some(b']') => in_class = false,
Some(b'/') => {
if !in_class {
let (mut g, mut i, mut m, mut s, mut u, mut y) = (false, false, false, false, false, false);
unwind_loop! {
let next = self.next_bounded().copied();
match next {
Some(b'g') => {
if g && diagnostic.is_none() {
diagnostic = Some(self.flag_err('g'))
}
g = true;
},
Some(b'i') => {
if i && diagnostic.is_none() {
diagnostic = Some(self.flag_err('i'))
}
i = true;
},
Some(b'm') => {
if m && diagnostic.is_none() {
diagnostic = Some(self.flag_err('m'))
}
m = true;
},
Some(b's') => {
if s && diagnostic.is_none() {
diagnostic = Some(self.flag_err('s'))
}
s = true;
},
Some(b'u') => {
if u && diagnostic.is_none() {
diagnostic = Some(self.flag_err('u'))
}
u = true;
},
Some(b'y') => {
if y && diagnostic.is_none() {
diagnostic = Some(self.flag_err('y'))
}
y = true;
},
Some(_) if self.cur_is_ident_part() => {
let chr_start = self.cur;
self.cur += self.get_unicode_char().len_utf8() - 1;
if diagnostic.is_none() {
diagnostic = Some(Diagnostic::error(self.file_id, "", "invalid regex flag")
.primary(chr_start .. self.cur + 1, "this is not a valid regex flag"));
}
},
_ => {
return (Token::new(SyntaxKind::REGEX, self.cur - start), diagnostic)
}
}
}
}
},
Some(b'\\') => {
if self.next_bounded().is_none() {
let err = Diagnostic::error(self.file_id, "", "expected a character after a regex escape, but found none")
.primary(self.cur..self.cur + 1, "expected a character following this");
return (Token::new(SyntaxKind::REGEX, self.cur - start), Some(err));
}
},
None => {
let err = Diagnostic::error(self.file_id, "", "unterminated regex literal")
.primary(self.cur..self.cur, "...but the file ends here")
.secondary(start..start + 1, "a regex literal starts there...");
return (Token::new(SyntaxKind::REGEX, self.cur - start), Some(err));
},
_ => {},
}
}
}
#[inline]
fn bin_or_assign(&mut self, bin: SyntaxKind, assign: SyntaxKind) -> LexerReturn {
if let Some(b'=') = self.next() {
self.next();
(Token::new(assign, 2), None)
} else {
(Token::new(bin, 1), None)
}
}
#[inline]
fn resolve_bang(&mut self) -> LexerReturn {
match self.next() {
Some(b'=') => {
if let Some(b'=') = self.next() {
self.next();
tok!(NEQ2, 3)
} else {
tok!(NEQ, 2)
}
}
_ => tok!(!),
}
}
#[inline]
fn resolve_amp(&mut self) -> LexerReturn {
match self.next() {
Some(b'&') => {
if let Some(b'=') = self.next() {
self.next();
tok!(AMP2EQ, 3)
} else {
tok!(AMP2, 2)
}
}
Some(b'=') => {
self.next();
tok!(AMPEQ, 2)
}
_ => tok!(&),
}
}
#[inline]
fn resolve_plus(&mut self) -> LexerReturn {
match self.next() {
Some(b'+') => {
self.next();
tok!(PLUS2, 2)
}
Some(b'=') => {
self.next();
tok!(PLUSEQ, 2)
}
_ => tok!(+),
}
}
#[inline]
fn resolve_minus(&mut self) -> LexerReturn {
match self.next() {
Some(b'-') => {
self.next();
tok!(MINUS2, 2)
}
Some(b'=') => {
self.next();
tok!(MINUSEQ, 2)
}
_ => tok!(-),
}
}
#[inline]
fn resolve_less_than(&mut self) -> LexerReturn {
match self.next() {
Some(b'<') => {
if let Some(b'=') = self.next() {
self.next();
tok!(SHLEQ, 3)
} else {
tok!(SHL, 2)
}
}
Some(b'=') => {
self.next();
tok!(LTEQ, 2)
}
_ => tok!(<),
}
}
#[inline]
fn resolve_greater_than(&mut self) -> LexerReturn {
match self.next() {
Some(b'>') => {
let next = self.next().copied();
if let Some(b'>') = next {
if let Some(b'=') = self.next() {
self.next();
tok!(USHREQ, 4)
} else {
tok!(USHR, 3)
}
} else if next == Some(b'=') {
self.next();
tok!(SHREQ, 2)
} else {
tok!(SHR, 2)
}
}
Some(b'=') => {
self.next();
tok!(GTEQ, 2)
}
_ => tok!(>),
}
}
#[inline]
fn resolve_eq(&mut self) -> LexerReturn {
match self.next() {
Some(b'=') => {
if let Some(b'=') = self.next() {
self.next();
tok!(EQ3, 3)
} else {
tok!(EQ2, 2)
}
}
Some(b'>') => {
self.next();
tok!(FAT_ARROW, 2)
}
_ => tok!(=),
}
}
#[inline]
fn resolve_pipe(&mut self) -> LexerReturn {
match self.next() {
Some(b'|') => {
if let Some(b'=') = self.next() {
self.next();
tok!(PIPE2EQ, 3)
} else {
tok!(PIPE2, 2)
}
}
Some(b'=') => {
self.next();
tok!(PIPEEQ, 2)
}
_ => tok!(|),
}
}
#[inline]
fn resolve_question(&mut self) -> LexerReturn {
match self.next() {
Some(b'?') => {
if let Some(b'=') = self.next() {
self.next();
tok!(QUESTION2EQ, 3)
} else {
tok!(QUESTION2, 2)
}
}
Some(b'.') => {
if let Some(b'0'..=b'9') = self.bytes.get(self.cur + 1) {
tok!(?)
} else {
self.next();
tok!(QUESTIONDOT, 2)
}
}
_ => tok!(?),
}
}
#[inline]
fn resolve_star(&mut self) -> LexerReturn {
match self.next() {
Some(b'*') => {
if let Some(b'=') = self.next() {
self.next();
tok!(STAR2EQ, 3)
} else {
tok!(STAR2, 2)
}
}
Some(b'=') => {
self.next();
tok!(STAREQ, 2)
}
_ => tok!(*),
}
}
fn lex_token(&mut self) -> LexerReturn {
let byte = unsafe { *self.bytes.get_unchecked(self.cur) };
let start = self.cur;
let dispatched = Self::lookup(byte);
match dispatched {
WHS => {
self.consume_whitespace();
tok!(WHITESPACE, self.cur - start)
}
EXL => self.resolve_bang(),
HAS => self.read_shebang(),
PRC => self.bin_or_assign(T![%], T![%=]),
AMP => self.resolve_amp(),
PNO => self.eat(tok!(L_PAREN, 1)),
PNC => self.eat(tok!(R_PAREN, 1)),
MUL => self.resolve_star(),
PLS => self.resolve_plus(),
COM => self.eat(tok![,]),
MIN => self.resolve_minus(),
SLH => self.read_slash(),
TPL => self.eat(tok!(BACKTICK, 1)),
ZER => {
self.read_zero();
self.verify_number_end(start)
}
PRD => {
if let Some(b"..") = self.bytes.get(self.cur + 1..self.cur + 3) {
self.cur += 3;
return tok!(DOT2, 3);
}
if let Some(b'0'..=b'9') = self.bytes.get(self.cur + 1) {
self.read_float();
self.verify_number_end(start)
} else {
self.eat(tok![.])
}
}
BSL => {
if self.bytes.get(self.cur + 1) == Some(&b'u') {
self.next();
let res = if self.bytes.get(self.cur + 1).copied() == Some(b'{') {
self.next();
self.read_codepoint_escape()
} else {
self.read_unicode_escape(true)
};
match res {
Ok(chr) => {
if is_id_start(chr) {
self.consume_ident();
tok!(IDENT, self.cur - start)
} else {
let err = Diagnostic::error(self.file_id, "", "unexpected unicode escape")
.primary(start..self.cur, "this escape is unexpected, as it does not designate the start of an identifier");
self.next();
(
Token::new(SyntaxKind::ERROR_TOKEN, self.cur - start),
Some(err),
)
}
}
Err(err) => (
Token::new(SyntaxKind::ERROR_TOKEN, self.cur - start),
Some(err),
),
}
} else {
let err = Diagnostic::error(
self.file_id,
"",
format!("unexpected token `{}`", byte as char),
)
.primary(start..self.cur + 1, "");
self.next();
(Token::new(SyntaxKind::ERROR_TOKEN, 1), Some(err))
}
}
QOT => {
if let Some(err) = self.read_str_literal() {
(
Token::new(SyntaxKind::ERROR_TOKEN, self.cur - start),
Some(err),
)
} else {
tok!(STRING, self.cur - start)
}
}
IDT => {
self.consume_ident();
tok!(IDENT, self.cur - start)
}
DIG => {
self.read_number();
self.verify_number_end(start)
}
COL => self.eat(tok![:]),
SEM => self.eat(tok![;]),
LSS => self.resolve_less_than(),
EQL => self.resolve_eq(),
MOR => self.resolve_greater_than(),
QST => self.resolve_question(),
BTO => self.eat(tok!(L_BRACK, 1)),
BTC => self.eat(tok![R_BRACK, 1]),
CRT => self.bin_or_assign(T![^], T![^=]),
BEO => self.eat(tok![L_CURLY, 1]),
BEC => self.eat(tok![R_CURLY, 1]),
PIP => self.resolve_pipe(),
TLD => self.eat(tok![~]),
L_A | L_B | L_C | L_D | L_E | L_F | L_I | L_N | L_R | L_S | L_T | L_V | L_W | L_Y => {
self.resolve_label(dispatched)
}
UNI => {
if UNICODE_WHITESPACE_STARTS.contains(&byte) {
let chr = self.get_unicode_char();
if is_linebreak(chr) {
self.state.had_linebreak = true;
}
self.cur += self.get_unicode_char().len_utf8() - 1;
self.consume_whitespace();
tok!(WHITESPACE, self.cur - start)
} else {
let chr = self.get_unicode_char();
self.cur += chr.len_utf8() - 1;
if is_id_start(chr) {
self.consume_ident();
tok!(IDENT, self.cur - start)
} else {
let err = Diagnostic::error(
self.file_id,
"",
format!("Unexpected token `{}`", chr as char),
)
.primary(start..self.cur + 1, "");
self.next();
(
Token::new(SyntaxKind::ERROR_TOKEN, self.cur - start),
Some(err),
)
}
}
}
_ => {
let err = Diagnostic::error(
self.file_id,
"",
format!("unexpected token `{}`", byte as char),
)
.primary(start..self.cur + 1, "");
self.next();
(Token::new(SyntaxKind::ERROR_TOKEN, 1), Some(err))
}
}
}
fn lex_template(&mut self) -> LexerReturn {
let start = self.cur;
let mut diagnostic = None;
while let Some(b) = self.bytes.get(self.cur) {
match *b as char {
'`' if self.cur == start => {
self.next();
return tok!(BACKTICK, 1);
}
'`' => {
return (
Token::new(SyntaxKind::TEMPLATE_CHUNK, self.cur - start),
diagnostic,
);
}
'\\' => {
if let Some(err) = self.validate_escape_sequence() {
diagnostic = Some(err);
}
self.next_bounded();
}
'$' if self.bytes.get(self.cur + 1) == Some(&b'{') && self.cur == start => {
self.advance(2);
return (Token::new(SyntaxKind::DOLLARCURLY, 2), diagnostic);
}
'$' if self.bytes.get(self.cur + 1) == Some(&b'{') => {
return (
Token::new(SyntaxKind::TEMPLATE_CHUNK, self.cur - start),
diagnostic,
)
}
_ => {
let _ = self.next();
}
}
}
let err = Diagnostic::error(self.file_id, "", "unterminated template literal")
.primary(self.cur..self.cur + 1, "");
(
Token::new(SyntaxKind::TEMPLATE_CHUNK, self.cur - start),
Some(err),
)
}
}
pub fn is_linebreak(chr: char) -> bool {
['\n', '\r', '\u{2028}', '\u{2029}'].contains(&chr)
}
impl Iterator for Lexer<'_> {
type Item = LexerReturn;
fn next(&mut self) -> Option<Self::Item> {
if self.cur >= self.bytes.len() {
if !self.returned_eof {
self.returned_eof = true;
return Some(tok!(EOF, 0));
}
return None;
}
let token = if self.state.is_in_template() {
self.lex_template()
} else {
self.lex_token()
};
if ![
SyntaxKind::COMMENT,
SyntaxKind::WHITESPACE,
SyntaxKind::TEMPLATE_CHUNK,
]
.contains(&token.0.kind)
{
self.state.update(token.0.kind);
}
Some(token)
}
}
#[allow(non_camel_case_types)]
#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
#[repr(u8)]
enum Dispatch {
ERR,
WHS,
EXL,
QOT,
IDT,
HAS,
PRC,
AMP,
PNO,
PNC,
MUL,
PLS,
COM,
MIN,
PRD,
SLH,
ZER,
DIG,
COL,
SEM,
LSS,
EQL,
MOR,
QST,
BTO,
BSL,
BTC,
CRT,
TPL,
L_A,
L_B,
L_C,
L_D,
L_E,
L_F,
L_I,
L_N,
L_R,
L_S,
L_T,
L_V,
L_W,
L_Y,
BEO,
PIP,
BEC,
TLD,
UNI,
}
use Dispatch::*;
static DISPATCHER: [Dispatch; 256] = [
ERR, ERR, ERR, ERR, ERR, ERR, ERR, ERR, ERR, WHS, WHS, WHS, WHS, WHS, ERR, ERR,
ERR, ERR, ERR, ERR, ERR, ERR, ERR, ERR, ERR, ERR, ERR, ERR, ERR, ERR, ERR, ERR,
WHS, EXL, QOT, HAS, IDT, PRC, AMP, QOT, PNO, PNC, MUL, PLS, COM, MIN, PRD, SLH,
ZER, DIG, DIG, DIG, DIG, DIG, DIG, DIG, DIG, DIG, COL, SEM, LSS, EQL, MOR, QST,
ERR, IDT, IDT, IDT, IDT, IDT, IDT, IDT, IDT, IDT, IDT, IDT, IDT, IDT, IDT, IDT,
IDT, IDT, IDT, IDT, IDT, IDT, IDT, IDT, IDT, IDT, IDT, BTO, BSL, BTC, CRT, IDT,
TPL, L_A, L_B, L_C, L_D, L_E, L_F, IDT, IDT, L_I, IDT, IDT, IDT, IDT, L_N, IDT,
IDT, IDT, L_R, L_S, L_T, IDT, L_V, L_W, IDT, L_Y, IDT, BEO, PIP, BEC, TLD, ERR,
UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI,
UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI,
UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI,
UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI,
UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI,
UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI,
UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI,
UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI,
];