#[macro_use]
mod token;
mod highlight;
mod labels;
mod state;
mod tests;
pub use token::Token;
#[cfg(feature = "highlight")]
pub use highlight::*;
use codespan_reporting::diagnostic::{Diagnostic, Label};
use state::LexerState;
use unicode_xid::UnicodeXID;
pub use rslint_syntax::*;
pub type LexerReturn = (Token, Option<Diagnostic<usize>>);
macro_rules! unwind_loop {
($($iter:tt)*) => {
$($iter)*
$($iter)*
$($iter)*
$($iter)*
$($iter)*
loop {
$($iter)*
$($iter)*
$($iter)*
$($iter)*
$($iter)*
}
};
}
const UNICODE_WHITESPACE_STARTS: [u8; 5] = [
0xC2,
0xEF,
0xE1,
0xE2,
0xE3,
];
const UNICODE_SPACES: [char; 16] = [
'\u{00A0}', '\u{1680}', '\u{2000}', '\u{2001}', '\u{2002}', '\u{2003}', '\u{2004}', '\u{2005}',
'\u{2006}', '\u{2007}', '\u{2008}', '\u{2009}', '\u{200A}', '\u{202F}', '\u{205F}', '\u{3000}',
];
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub struct Lexer<'src> {
bytes: &'src [u8],
cur: usize,
state: LexerState,
pub file_id: usize,
returned_eof: bool,
}
impl<'src> Lexer<'src> {
pub unsafe fn from_bytes(bytes: &'src [u8], file_id: usize) -> Self {
Self {
bytes,
cur: 0,
file_id,
state: LexerState::new(),
returned_eof: false,
}
}
pub fn from_str(string: &'src str, file_id: usize) -> Self {
Self {
bytes: string.as_bytes(),
cur: 0,
file_id,
state: LexerState::new(),
returned_eof: false,
}
}
pub fn strip_shebang(&mut self) {
if let Some(b"#!") = self.bytes.get(0..2) {
if self.cur != 0 {
return;
}
self.next();
while self.next().is_some() {
let chr = self.get_unicode_char();
self.cur += chr.len_utf8() - 1;
if is_linebreak(chr) {
return;
}
}
}
}
fn eat(&mut self, tok: LexerReturn) -> LexerReturn {
self.next();
tok
}
fn consume_whitespace(&mut self) {
unwind_loop! {
if let Some(byte) = self.next().copied() {
if DISPATCHER[byte as usize] != Dispatch::WHS {
if byte > 0xC1 && UNICODE_WHITESPACE_STARTS.contains(&byte) {
let chr = self.get_unicode_char();
if is_linebreak(chr) {
self.state.had_linebreak = true;
}
if !UNICODE_SPACES.contains(&chr) {
return;
}
self.cur += chr.len_utf8() - 1;
} else {
return;
}
}
if is_linebreak(byte as char) {
self.state.had_linebreak = true;
}
} else {
return;
}
}
}
fn get_unicode_char(&self) -> char {
debug_assert!(self.cur < self.bytes.len());
let string =
unsafe { std::str::from_utf8_unchecked(&self.bytes.get_unchecked(self.cur..)) };
let chr = if let Some(chr) = string.chars().next() {
chr
} else {
unsafe {
core::hint::unreachable_unchecked();
}
};
chr
}
#[inline]
fn next(&mut self) -> Option<&u8> {
self.cur += 1;
self.bytes.get(self.cur)
}
#[inline]
fn next_bounded(&mut self) -> Option<&u8> {
if let Some(b) = self.bytes.get(self.cur + 1) {
self.cur += 1;
Some(b)
} else {
if self.cur != self.bytes.len() {
self.cur += 1;
}
None
}
}
fn advance(&mut self, amount: usize) {
self.cur += amount;
}
fn lookup(byte: u8) -> Dispatch {
unsafe { *DISPATCHER.get_unchecked(byte as usize) }
}
fn read_codepoint_escape(&mut self) -> Result<Option<char>, Diagnostic<usize>> {
let start = self.cur + 1;
self.read_hexnumber();
if self.bytes.get(self.cur) != Some(&b'}') {
let invalid = self.get_unicode_char();
let err = Diagnostic::error().with_message("Expected hex digits for a unicode code point escape, but encountered an invalid character")
.with_labels(vec![
Label::primary(self.file_id, self.cur..(invalid.len_utf8()))
]);
return Err(err);
}
let digits_str = unsafe {
debug_assert!(self.bytes.get(start..self.cur).is_some());
debug_assert!(std::str::from_utf8(self.bytes.get_unchecked(start..self.cur)).is_ok());
std::str::from_utf8_unchecked(self.bytes.get_unchecked(start..self.cur))
};
match u32::from_str_radix(digits_str, 16) {
Ok(digits) if digits <= 0x10FFFF => Ok(std::char::from_u32(digits)),
_ => {
let err = Diagnostic::error()
.with_message("Out of bounds codepoint for unicode codepoint escape sequence")
.with_labels(vec![Label::primary(self.file_id, start..self.cur)])
.with_notes(vec![
"Note: Codepoints range from 0 to 0x10FFFF (1114111)".to_string()
]);
Err(err)
}
}
}
fn read_unicode_escape(&mut self, advance: bool) -> Result<char, Diagnostic<usize>> {
debug_assert_eq!(self.bytes[self.cur], b'u');
let diagnostic = Diagnostic::error()
.with_message("Invalid digits after unicode escape sequence")
.with_labels(vec![Label::primary(
self.file_id,
(self.cur - 1)..(self.cur + 1),
)
.with_message("Expected 4 hex digits following this")]);
for idx in 0..4 {
match self.next_bounded() {
None => {
if !advance {
self.cur -= idx + 1;
}
return Err(diagnostic);
}
Some(b) if !(*b as u8).is_ascii_hexdigit() => {
if !advance {
self.cur -= idx + 1;
}
return Err(diagnostic);
}
_ => {}
}
}
unsafe {
let digits_str = std::str::from_utf8_unchecked(
self.bytes.get_unchecked((self.cur - 3)..(self.cur + 1)),
);
if let Ok(digits) = u32::from_str_radix(digits_str, 16) {
if !advance {
self.cur -= 4;
}
Ok(std::char::from_u32_unchecked(digits))
} else {
core::hint::unreachable_unchecked();
}
}
}
fn validate_hex_escape(&mut self) -> Option<Diagnostic<usize>> {
debug_assert_eq!(self.bytes[self.cur], b'x');
let diagnostic = Diagnostic::error()
.with_message("Invalid digits after hex escape sequence")
.with_labels(vec![Label::primary(
self.file_id,
(self.cur - 1)..(self.cur + 1),
)
.with_message("Expected 2 hex digits following this")]);
for _ in 0..2 {
match self.next_bounded() {
None => return Some(diagnostic),
Some(b) if !(*b as u8).is_ascii_hexdigit() => return Some(diagnostic),
_ => {}
}
}
None
}
fn validate_escape_sequence(&mut self) -> Option<Diagnostic<usize>> {
let cur = self.cur;
if let Some(escape) = self.bytes.get(self.cur + 1) {
match escape {
b'u' if self.bytes.get(self.cur + 2) == Some(&b'{') => {
self.advance(2);
self.read_codepoint_escape().err()
}
b'u' => {
self.next();
self.read_unicode_escape(true).err()
}
b'x' => {
self.next();
self.validate_hex_escape()
}
_ => {
let chr = self.get_unicode_char();
self.cur += chr.len_utf8();
None
}
}
} else {
Some(Diagnostic::error().with_labels(vec![
Label::primary(self.file_id, cur..(cur + 1)).with_message(
"Expected an escape sequence following a backslash, but found none",
),
]))
}
}
#[inline]
fn consume_ident(&mut self) {
unwind_loop! {
if self.next_bounded().is_some() {
if !self.cur_is_ident_part() {
return;
}
} else {
return;
}
}
}
fn read_str_literal(&mut self) -> Option<Diagnostic<usize>> {
let quote = unsafe { *self.bytes.get_unchecked(self.cur) };
let start = self.cur;
let mut diagnostic = None;
while let Some(byte) = self.next_bounded() {
match *byte {
b'\\' => {
diagnostic = self.validate_escape_sequence();
}
b if b == quote => {
self.next();
return diagnostic;
}
_ => {}
}
}
let unterminated = Diagnostic::error()
.with_message("Unterminated string literal")
.with_labels(vec![
Label::primary(self.file_id, self.cur..self.cur).with_message("Input ends here"),
Label::secondary(self.file_id, start..start + 1)
.with_message("String literal starts here"),
]);
Some(unterminated)
}
#[inline]
fn cur_is_ident_part(&mut self) -> bool {
debug_assert!(self.cur < self.bytes.len());
let b = unsafe { self.bytes.get_unchecked(self.cur) };
match Self::lookup(*b) {
IDT | DIG | ZER | L_A | L_B | L_C | L_D | L_E | L_F | L_I | L_N | L_R | L_S | L_T
| L_V | L_W | L_Y => true,
UNI => {
let res = self.get_unicode_char().is_xid_continue();
if res {
self.cur += self.get_unicode_char().len_utf8() - 1;
}
res
}
BSL if self.bytes.get(self.cur + 1) == Some(&b'u') => {
self.next();
if let Ok(c) = self.read_unicode_escape(false) {
if c.is_xid_continue() {
self.cur += c.len_utf8() - 1;
true
} else {
self.cur -= 1;
false
}
} else {
self.cur -= 1;
false
}
}
_ => false,
}
}
#[inline]
fn cur_is_ident_start(&mut self) -> bool {
debug_assert!(self.cur < self.bytes.len());
let b = unsafe { self.bytes.get_unchecked(self.cur) };
match Self::lookup(*b) {
BSL if self.bytes.get(self.cur + 1) == Some(&b'u') => {
self.next();
if let Ok(chr) = self.read_unicode_escape(false) {
if chr.is_xid_start() {
self.advance(5);
return true;
}
}
self.cur -= 1;
false
}
UNI => {
let chr = self.get_unicode_char();
if chr.is_xid_start() {
self.cur += chr.len_utf8() - 1;
true
} else {
false
}
}
IDT | L_A | L_B | L_C | L_D | L_E | L_F | L_I | L_N | L_R | L_S | L_T | L_V | L_W
| L_Y => true,
_ => false,
}
}
#[inline]
fn resolve_label(&mut self, label: Dispatch) -> LexerReturn {
let start = self.cur;
let kind = match label {
L_A => self.resolve_label_a(),
L_B => self.resolve_label_b(),
L_C => self.resolve_label_c(),
L_D => self.resolve_label_d(),
L_E => self.resolve_label_e(),
L_F => self.resolve_label_f(),
L_I => self.resolve_label_i(),
L_N => self.resolve_label_n(),
L_R => self.resolve_label_r(),
L_S => self.resolve_label_s(),
L_T => self.resolve_label_t(),
L_V => self.resolve_label_v(),
L_W => self.resolve_label_w(),
L_Y => self.resolve_label_y(),
_ => unsafe { core::hint::unreachable_unchecked() },
};
if let Some(syntax_kind) = kind {
if self.next_bounded().is_some() {
if self.cur_is_ident_part() {
self.consume_ident();
(Token::new(T![ident], self.cur - start), None)
} else {
(Token::new(syntax_kind, self.cur - start), None)
}
} else {
(Token::new(syntax_kind, self.cur - start), None)
}
} else {
self.consume_ident();
(Token::new(T![ident], self.cur - start), None)
}
}
#[inline]
fn special_number_start<F: Fn(char) -> bool>(&mut self, func: F) -> bool {
if self
.bytes
.get(self.cur + 2)
.map(|b| func(*b as char))
.unwrap_or(false)
{
self.cur += 1;
true
} else {
false
}
}
#[inline]
fn maybe_bigint(&mut self) {
if let Some(b'n') = self.bytes.get(self.cur) {
self.next();
}
}
#[inline]
fn read_zero(&mut self) {
match self.bytes.get(self.cur + 1) {
Some(b'x') | Some(b'X') => {
if self.special_number_start(|c| c.is_ascii_hexdigit()) {
self.read_hexnumber();
self.maybe_bigint();
} else {
self.next();
}
}
Some(b'b') | Some(b'B') => {
if self.special_number_start(|c| c == '0' || c == '1') {
self.read_bindigits();
self.maybe_bigint();
} else {
self.next();
}
}
Some(b'o') | Some(b'O') => {
if self.special_number_start(|c| ('0'..='7').contains(&c)) {
self.read_octaldigits();
self.maybe_bigint();
} else {
self.next();
}
}
Some(b'n') => {
self.cur += 2;
}
Some(b'.') => {
self.cur += 1;
self.read_float();
}
Some(b'e') | Some(b'E') => {
match self.bytes.get(self.cur + 2) {
Some(b'-') | Some(b'+') => {
if let Some(b'0'..=b'9') = self.bytes.get(self.cur + 3) {
self.next();
self.read_exponent();
}
}
Some(b'0'..=b'9') => self.read_exponent(),
_ => {
self.next();
}
}
}
_ => self.read_number(),
}
}
#[inline]
fn read_hexnumber(&mut self) {
unwind_loop! {
if let Some(b) = self.next_bounded() {
if !(*b as char).is_ascii_hexdigit() {
return;
}
} else {
return;
}
}
}
#[inline]
fn read_number(&mut self) {
unwind_loop! {
match self.next_bounded() {
Some(b'0'..=b'9') => {},
Some(b'.') => {
return self.read_float();
},
Some(b'e') | Some(b'E') => {
match self.bytes.get(self.cur + 1) {
Some(b'-') | Some(b'+') => {
if let Some(b'0'..=b'9') = self.bytes.get(self.cur + 2) {
self.next();
return self.read_exponent();
} else {
return;
}
},
Some(b'0'..=b'9') => return self.read_exponent(),
_ => return,
}
},
Some(b'n') => {
self.next();
return;
}
_ => return,
}
}
}
#[inline]
fn read_float(&mut self) {
unwind_loop! {
match self.next_bounded() {
Some(b'0'..=b'9') => {},
Some(b'e') | Some(b'E') => {
match self.bytes.get(self.cur + 1) {
Some(b'-') | Some(b'+') => {
if let Some(b'0'..=b'9') = self.bytes.get(self.cur + 2) {
self.next();
return self.read_exponent();
} else {
return;
}
},
Some(b'0'..=b'9') => return self.read_exponent(),
_ => return,
}
},
_ => return,
}
}
}
#[inline]
fn read_exponent(&mut self) {
if let Some(b'-') | Some(b'+') = self.bytes.get(self.cur + 1) {
self.next();
}
unwind_loop! {
if let Some(b'0'..=b'9') = self.next() {
} else {
return;
}
}
}
#[inline]
fn read_bindigits(&mut self) {
unwind_loop! {
if let Some(b'0') | Some(b'1') = self.next() {
} else {
return
}
}
}
#[inline]
fn read_octaldigits(&mut self) {
unwind_loop! {
if let Some(b'0'..=b'7') = self.next() {
} else {
return
}
}
}
#[inline]
fn verify_number_end(&mut self, start: usize) -> LexerReturn {
let err_start = self.cur;
if self.cur < self.bytes.len() && self.cur_is_ident_start() {
self.consume_ident();
let err = Diagnostic::error()
.with_message("Numbers cannot be followed by identifiers directly after")
.with_labels(vec![Label::primary(self.file_id, err_start..self.cur)
.with_message("An identifier cannot appear here")]);
(
Token::new(SyntaxKind::ERROR_TOKEN, self.cur - start),
Some(err),
)
} else {
tok!(NUMBER, self.cur - start)
}
}
#[inline]
fn read_slash(&mut self) -> LexerReturn {
let start = self.cur;
match self.bytes.get(self.cur + 1) {
Some(b'*') => {
self.next();
while let Some(b) = self.next().copied() {
match b {
b'*' if self.bytes.get(self.cur + 1) == Some(&b'/') => {
self.advance(2);
return tok!(COMMENT, self.cur - start);
}
_ => {}
}
}
let err = Diagnostic::error()
.with_message("Unterminated block comment")
.with_labels(vec![
Label::primary(self.file_id, self.cur..self.cur + 1)
.with_message("... but the file ends here"),
Label::secondary(self.file_id, start..start + 2)
.with_message("A block comment starts here..."),
]);
(Token::new(SyntaxKind::COMMENT, self.cur - start), Some(err))
}
Some(b'/') => {
self.next();
while self.next().is_some() {
let chr = self.get_unicode_char();
if is_linebreak(chr) {
return tok!(COMMENT, self.cur - start);
}
self.cur += chr.len_utf8() - 1;
}
tok!(COMMENT, self.cur - start)
}
_ if self.state.expr_allowed => self.read_regex(),
_ => self.eat(tok![/]),
}
}
#[inline]
fn flag_err(&self, flag: char) -> Diagnostic<usize> {
Diagnostic::error()
.with_message(&format!("Duplicate flag `{}`", flag))
.with_labels(vec![Label::primary(self.file_id, self.cur..self.cur + 1)
.with_message("This flag was already used")])
}
#[inline]
#[allow(clippy::many_single_char_names)]
fn read_regex(&mut self) -> LexerReturn {
let start = self.cur;
let mut in_class = false;
let mut diagnostic = None;
unwind_loop! {
match self.next() {
Some(b'[') => in_class = true,
Some(b']') => in_class = false,
Some(b'/') => {
if !in_class {
let (mut g, mut i, mut m, mut s, mut u, mut y) = (false, false, false, false, false, false);
unwind_loop! {
let next = self.next_bounded().copied();
match next {
Some(b'g') => {
if g && diagnostic.is_none() {
diagnostic = Some(self.flag_err('g'))
}
g = true;
},
Some(b'i') => {
if i && diagnostic.is_none() {
diagnostic = Some(self.flag_err('i'))
}
i = true;
},
Some(b'm') => {
if m && diagnostic.is_none() {
diagnostic = Some(self.flag_err('m'))
}
m = true;
},
Some(b's') => {
if s && diagnostic.is_none() {
diagnostic = Some(self.flag_err('s'))
}
s = true;
},
Some(b'u') => {
if u && diagnostic.is_none() {
diagnostic = Some(self.flag_err('u'))
}
u = true;
},
Some(b'y') => {
if y && diagnostic.is_none() {
diagnostic = Some(self.flag_err('y'))
}
y = true;
},
Some(_) if self.cur_is_ident_part() => {
let chr_start = self.cur;
self.cur += self.get_unicode_char().len_utf8() - 1;
if diagnostic.is_none() {
diagnostic = Some(Diagnostic::error()
.with_message("Invalid regex flag")
.with_labels(vec![Label::primary(self.file_id, chr_start..self.cur + 1)
.with_message("This is not a valid regex flag")]));
}
},
_ => {
return (Token::new(SyntaxKind::REGEX, self.cur - start), diagnostic)
}
}
}
}
},
Some(b'\\') => {
if self.next_bounded().is_none() {
let err = Diagnostic::error().with_message("Expected a character after a regex escape, but found none")
.with_labels(vec![
Label::primary(self.file_id, self.cur..self.cur + 1).with_message("Expected a character following this")
]);
return (Token::new(SyntaxKind::REGEX, self.cur - start), Some(err));
}
},
None => {
let err = Diagnostic::error().with_message("Unterminated regex literal")
.with_labels(vec![
Label::primary(self.file_id, self.cur..self.cur).with_message("...but the file ends here"),
Label::secondary(self.file_id, start..start + 1).with_message("a regex literal starts here...")
]);
return (Token::new(SyntaxKind::REGEX, self.cur - start), Some(err));
},
_ => {},
}
}
}
#[inline]
fn bin_or_assign(&mut self, bin: SyntaxKind, assign: SyntaxKind) -> LexerReturn {
if let Some(b'=') = self.next() {
self.next();
(Token::new(assign, 2), None)
} else {
(Token::new(bin, 1), None)
}
}
#[inline]
fn resolve_bang(&mut self) -> LexerReturn {
match self.next() {
Some(b'=') => {
if let Some(b'=') = self.next() {
self.next();
tok!(NEQ2, 3)
} else {
tok!(NEQ, 2)
}
}
_ => tok!(!),
}
}
#[inline]
fn resolve_amp(&mut self) -> LexerReturn {
match self.next() {
Some(b'&') => {
if let Some(b'=') = self.next() {
self.next();
tok!(AMP2EQ, 3)
} else {
tok!(AMP2, 2)
}
}
Some(b'=') => {
self.next();
tok!(AMPEQ, 2)
}
_ => tok!(&),
}
}
#[inline]
fn resolve_plus(&mut self) -> LexerReturn {
match self.next() {
Some(b'+') => {
self.next();
tok!(PLUS2, 2)
}
Some(b'=') => {
self.next();
tok!(PLUSEQ, 2)
}
_ => tok!(+),
}
}
#[inline]
fn resolve_minus(&mut self) -> LexerReturn {
match self.next() {
Some(b'-') => {
self.next();
tok!(MINUS2, 2)
}
Some(b'=') => {
self.next();
tok!(MINUSEQ, 2)
}
_ => tok!(-),
}
}
#[inline]
fn resolve_less_than(&mut self) -> LexerReturn {
match self.next() {
Some(b'<') => {
if let Some(b'=') = self.next() {
self.next();
tok!(SHLEQ, 3)
} else {
tok!(SHL, 2)
}
}
Some(b'=') => {
self.next();
tok!(LTEQ, 2)
}
_ => tok!(<),
}
}
#[inline]
fn resolve_greater_than(&mut self) -> LexerReturn {
match self.next() {
Some(b'>') => {
if let Some(b'>') = self.next() {
if let Some(b'=') = self.next() {
self.next();
tok!(USHREQ, 4)
} else {
tok!(USHR, 3)
}
} else {
tok!(SHR, 2)
}
}
Some(b'=') => {
self.next();
tok!(GTEQ, 2)
}
_ => tok!(>),
}
}
#[inline]
fn resolve_eq(&mut self) -> LexerReturn {
match self.next() {
Some(b'=') => {
if let Some(b'=') = self.next() {
self.next();
tok!(EQ3, 3)
} else {
tok!(EQ2, 2)
}
}
Some(b'>') => {
self.next();
tok!(FAT_ARROW, 2)
}
_ => tok!(=),
}
}
#[inline]
fn resolve_pipe(&mut self) -> LexerReturn {
match self.next() {
Some(b'|') => {
if let Some(b'=') = self.next() {
self.next();
tok!(PIPE2EQ, 3)
} else {
tok!(PIPE2, 2)
}
}
Some(b'=') => {
self.next();
tok!(PIPEEQ, 2)
}
_ => tok!(|),
}
}
#[inline]
fn resolve_question(&mut self) -> LexerReturn {
match self.next() {
Some(b'?') => {
if let Some(b'=') = self.next() {
self.next();
tok!(QUESTION2EQ, 3)
} else {
tok!(QUESTION2, 2)
}
}
Some(b'.') => {
if let Some(b'0'..=b'9') = self.bytes.get(self.cur + 1) {
tok!(?)
} else {
self.next();
tok!(QUESTIONDOT, 2)
}
}
_ => tok!(?),
}
}
#[inline]
fn resolve_star(&mut self) -> LexerReturn {
match self.next() {
Some(b'*') => {
if let Some(b'=') = self.next() {
self.next();
tok!(STAR2EQ, 3)
} else {
tok!(STAR2, 2)
}
}
Some(b'=') => {
self.next();
tok!(STAREQ, 2)
}
_ => tok!(*),
}
}
fn lex_token(&mut self) -> LexerReturn {
let byte = unsafe { *self.bytes.get_unchecked(self.cur) };
let start = self.cur;
let dispatched = Self::lookup(byte);
match dispatched {
WHS => {
self.consume_whitespace();
tok!(WHITESPACE, self.cur - start)
}
EXL => self.resolve_bang(),
PRC => self.bin_or_assign(T![%], T![%=]),
AMP => self.resolve_amp(),
PNO => self.eat(tok!(L_PAREN, 1)),
PNC => self.eat(tok!(R_PAREN, 1)),
MUL => self.resolve_star(),
PLS => self.resolve_plus(),
COM => self.eat(tok![,]),
MIN => self.resolve_minus(),
SLH => self.read_slash(),
TPL => self.eat(tok!(BACKTICK, 1)),
ZER => {
self.read_zero();
self.verify_number_end(start)
}
PRD => {
if let Some(b"..") = self.bytes.get(self.cur + 1..self.cur + 3) {
self.cur += 3;
return tok!(DOT2, 3);
}
if let Some(b'0'..=b'9') = self.bytes.get(self.cur + 1) {
self.read_float();
self.verify_number_end(start)
} else {
self.eat(tok![.])
}
}
BSL => {
if self.bytes.get(self.cur + 1) == Some(&b'u') {
self.next();
match self.read_unicode_escape(true) {
Ok(chr) => {
if chr.is_xid_start() {
self.consume_ident();
tok!(IDENT, self.cur - start)
} else {
let err = Diagnostic::error().with_message("Unexpected unicode escape")
.with_labels(vec![Label::primary(self.file_id, start..self.cur)]).with_message("This escape is unexpected, as it does not designate the start of an identifier");
self.next();
(
Token::new(SyntaxKind::ERROR_TOKEN, self.cur - start),
Some(err),
)
}
}
Err(err) => (
Token::new(SyntaxKind::ERROR_TOKEN, self.cur - start),
Some(err),
),
}
} else {
let err = Diagnostic::error()
.with_message(&format!("Unexpected token `{}`", byte as char))
.with_labels(vec![Label::primary(self.file_id, start..self.cur + 1)]);
self.next();
(Token::new(SyntaxKind::ERROR_TOKEN, 1), Some(err))
}
}
QOT => {
if let Some(err) = self.read_str_literal() {
(
Token::new(SyntaxKind::ERROR_TOKEN, self.cur - start),
Some(err),
)
} else {
tok!(STRING, self.cur - start)
}
}
IDT => {
self.consume_ident();
tok!(IDENT, self.cur - start)
}
DIG => {
self.read_number();
self.verify_number_end(start)
}
COL => self.eat(tok![:]),
SEM => self.eat(tok![;]),
LSS => self.resolve_less_than(),
EQL => self.resolve_eq(),
MOR => self.resolve_greater_than(),
QST => self.resolve_question(),
BTO => self.eat(tok!(L_BRACK, 1)),
BTC => self.eat(tok![R_BRACK, 1]),
CRT => self.bin_or_assign(T![^], T![^=]),
BEO => self.eat(tok![L_CURLY, 1]),
BEC => self.eat(tok![R_CURLY, 1]),
PIP => self.resolve_pipe(),
TLD => self.eat(tok![~]),
L_A | L_B | L_C | L_D | L_E | L_F | L_I | L_N | L_R | L_S | L_T | L_V | L_W | L_Y => {
self.resolve_label(dispatched)
}
UNI => {
if UNICODE_WHITESPACE_STARTS.contains(&byte) {
let chr = self.get_unicode_char();
if is_linebreak(chr) {
self.state.had_linebreak = true;
}
self.cur += self.get_unicode_char().len_utf8() - 1;
self.consume_whitespace();
tok!(WHITESPACE, self.cur - start)
} else {
let chr = self.get_unicode_char();
self.cur += chr.len_utf8() - 1;
let err = Diagnostic::error()
.with_message(&format!("Unexpected token `{}`", chr as char))
.with_labels(vec![Label::primary(self.file_id, start..self.cur + 1)]);
self.next();
(
Token::new(SyntaxKind::ERROR_TOKEN, self.cur - start),
Some(err),
)
}
}
_ => {
let err = Diagnostic::error()
.with_message(&format!("Unexpected token `{}`", byte as char))
.with_labels(vec![Label::primary(self.file_id, start..self.cur + 1)]);
self.next();
(Token::new(SyntaxKind::ERROR_TOKEN, 1), Some(err))
}
}
}
fn lex_template(&mut self) -> LexerReturn {
let start = self.cur;
let mut diagnostic = None;
while let Some(b) = self.bytes.get(self.cur) {
match *b as char {
'`' if self.cur == start => {
self.next();
return tok!(BACKTICK, 1);
}
'`' => {
return (
Token::new(SyntaxKind::TEMPLATE_CHUNK, self.cur - start),
diagnostic,
);
}
'\\' => {
if let Some(err) = self.validate_escape_sequence() {
diagnostic = Some(err);
}
self.next_bounded();
}
'$' if self.bytes.get(self.cur + 1) == Some(&b'{') && self.cur == start => {
self.advance(2);
return (Token::new(SyntaxKind::DOLLARCURLY, 2), diagnostic);
}
'$' if self.bytes.get(self.cur + 1) == Some(&b'{') => {
return (
Token::new(SyntaxKind::TEMPLATE_CHUNK, self.cur - start),
diagnostic,
)
}
_ => {
let _ = self.next();
}
}
}
let err = Diagnostic::error()
.with_message("Unterminated template literal")
.with_labels(vec![Label::primary(self.file_id, self.cur..self.cur + 1)]);
(
Token::new(SyntaxKind::TEMPLATE_CHUNK, self.cur - start),
Some(err),
)
}
}
pub fn is_linebreak(chr: char) -> bool {
['\n', '\r', '\u{2028}', '\u{2029}'].contains(&chr)
}
impl Iterator for Lexer<'_> {
type Item = LexerReturn;
fn next(&mut self) -> Option<Self::Item> {
if self.cur >= self.bytes.len() {
if !self.returned_eof {
self.returned_eof = true;
return Some(tok!(EOF, 0));
}
return None;
}
let token = if self.state.is_in_template() {
self.lex_template()
} else {
self.lex_token()
};
if ![
SyntaxKind::COMMENT,
SyntaxKind::WHITESPACE,
SyntaxKind::TEMPLATE_CHUNK,
]
.contains(&token.0.kind)
{
self.state.update(token.0.kind);
}
Some(token)
}
}
#[allow(non_camel_case_types)]
#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
#[repr(u8)]
enum Dispatch {
ERR,
WHS,
EXL,
QOT,
IDT,
PRC,
AMP,
PNO,
PNC,
MUL,
PLS,
COM,
MIN,
PRD,
SLH,
ZER,
DIG,
COL,
SEM,
LSS,
EQL,
MOR,
QST,
BTO,
BSL,
BTC,
CRT,
TPL,
L_A,
L_B,
L_C,
L_D,
L_E,
L_F,
L_I,
L_N,
L_R,
L_S,
L_T,
L_V,
L_W,
L_Y,
BEO,
PIP,
BEC,
TLD,
UNI,
}
use Dispatch::*;
static DISPATCHER: [Dispatch; 256] = [
ERR, ERR, ERR, ERR, ERR, ERR, ERR, ERR, ERR, WHS, WHS, WHS, WHS, WHS, ERR, ERR,
ERR, ERR, ERR, ERR, ERR, ERR, ERR, ERR, ERR, ERR, ERR, ERR, ERR, ERR, ERR, ERR,
WHS, EXL, QOT, ERR, IDT, PRC, AMP, QOT, PNO, PNC, MUL, PLS, COM, MIN, PRD, SLH,
ZER, DIG, DIG, DIG, DIG, DIG, DIG, DIG, DIG, DIG, COL, SEM, LSS, EQL, MOR, QST,
ERR, IDT, IDT, IDT, IDT, IDT, IDT, IDT, IDT, IDT, IDT, IDT, IDT, IDT, IDT, IDT,
IDT, IDT, IDT, IDT, IDT, IDT, IDT, IDT, IDT, IDT, IDT, BTO, BSL, BTC, CRT, IDT,
TPL, L_A, L_B, L_C, L_D, L_E, L_F, IDT, IDT, L_I, IDT, IDT, IDT, IDT, L_N, IDT,
IDT, IDT, L_R, L_S, L_T, IDT, L_V, L_W, IDT, L_Y, IDT, BEO, PIP, BEC, TLD, ERR,
UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI,
UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI,
UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI,
UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI,
UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI,
UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI,
UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI,
UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI,
];