use crate::error::{StatorError, StatorResult};
#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
pub struct Position {
pub offset: usize,
pub line: u32,
pub column: u32,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub struct Span {
pub start: Position,
pub end: Position,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum TokenKind {
NumericLiteral,
StringLiteral,
NoSubstitutionTemplate,
TemplateHead,
TemplateMiddle,
TemplateTail,
RegExpLiteral,
Identifier,
PrivateIdentifier,
Await,
Break,
Case,
Catch,
Class,
Const,
Continue,
Debugger,
Default,
Delete,
Do,
Else,
Enum,
Export,
Extends,
False,
Finally,
For,
Function,
If,
Import,
In,
Instanceof,
Let,
New,
Null,
Of,
Return,
Static,
Super,
Switch,
This,
Throw,
True,
Try,
Typeof,
Var,
Void,
While,
With,
Yield,
Async,
From,
As,
Get,
Set,
Target,
Meta,
Using,
LeftBrace,
RightBrace,
LeftParen,
RightParen,
LeftBracket,
RightBracket,
Dot,
DotDotDot,
Semicolon,
Comma,
Less,
Greater,
LessEqual,
GreaterEqual,
EqualEqual,
BangEqual,
EqualEqualEqual,
BangEqualEqual,
Plus,
Minus,
Star,
StarStar,
Slash,
Percent,
PlusPlus,
MinusMinus,
LessLess,
GreaterGreater,
GreaterGreaterGreater,
Ampersand,
Pipe,
Caret,
Bang,
Tilde,
AmpersandAmpersand,
PipePipe,
QuestionQuestion,
Question,
Colon,
Equal,
PlusEqual,
MinusEqual,
StarEqual,
StarStarEqual,
SlashEqual,
PercentEqual,
LessLessEqual,
GreaterGreaterEqual,
GreaterGreaterGreaterEqual,
AmpersandEqual,
PipeEqual,
CaretEqual,
AmpersandAmpersandEqual,
PipePipeEqual,
QuestionQuestionEqual,
Arrow,
QuestionDot,
SingleLineComment,
MultiLineComment,
HashbangComment,
Eof,
}
#[derive(Debug, Clone, PartialEq)]
pub enum TokenValue {
None,
Str(String),
Number(f64),
BigInt(String),
}
#[derive(Debug, Clone, PartialEq)]
pub struct Token {
pub kind: TokenKind,
pub value: TokenValue,
pub span: Span,
pub had_line_terminator_before: bool,
}
fn is_line_terminator(c: char) -> bool {
matches!(c, '\n' | '\r' | '\u{2028}' | '\u{2029}')
}
fn is_js_whitespace(c: char) -> bool {
matches!(
c,
'\t' | '\x0B' | '\x0C' | ' ' | '\u{00A0}' | '\u{FEFF}' | '\u{1680}' | '\u{2000}'
..='\u{200A}' | '\u{202F}' | '\u{205F}' | '\u{3000}' | '\n'
| '\r'
| '\u{2028}'
| '\u{2029}'
)
}
fn is_id_start(c: char) -> bool {
c == '$' || c == '_' || c.is_alphabetic()
}
fn is_id_continue(c: char) -> bool {
c == '$' || c == '_' || c == '\u{200C}' || c == '\u{200D}' || c.is_alphanumeric()
}
fn keyword_kind(s: &str) -> Option<TokenKind> {
match s {
"await" => Some(TokenKind::Await),
"break" => Some(TokenKind::Break),
"case" => Some(TokenKind::Case),
"catch" => Some(TokenKind::Catch),
"class" => Some(TokenKind::Class),
"const" => Some(TokenKind::Const),
"continue" => Some(TokenKind::Continue),
"debugger" => Some(TokenKind::Debugger),
"default" => Some(TokenKind::Default),
"delete" => Some(TokenKind::Delete),
"do" => Some(TokenKind::Do),
"else" => Some(TokenKind::Else),
"enum" => Some(TokenKind::Enum),
"export" => Some(TokenKind::Export),
"extends" => Some(TokenKind::Extends),
"false" => Some(TokenKind::False),
"finally" => Some(TokenKind::Finally),
"for" => Some(TokenKind::For),
"function" => Some(TokenKind::Function),
"if" => Some(TokenKind::If),
"import" => Some(TokenKind::Import),
"in" => Some(TokenKind::In),
"instanceof" => Some(TokenKind::Instanceof),
"let" => Some(TokenKind::Let),
"new" => Some(TokenKind::New),
"null" => Some(TokenKind::Null),
"of" => Some(TokenKind::Of),
"return" => Some(TokenKind::Return),
"static" => Some(TokenKind::Static),
"super" => Some(TokenKind::Super),
"switch" => Some(TokenKind::Switch),
"this" => Some(TokenKind::This),
"throw" => Some(TokenKind::Throw),
"true" => Some(TokenKind::True),
"try" => Some(TokenKind::Try),
"typeof" => Some(TokenKind::Typeof),
"var" => Some(TokenKind::Var),
"void" => Some(TokenKind::Void),
"while" => Some(TokenKind::While),
"with" => Some(TokenKind::With),
"yield" => Some(TokenKind::Yield),
"async" => Some(TokenKind::Async),
"from" => Some(TokenKind::From),
"as" => Some(TokenKind::As),
"get" => Some(TokenKind::Get),
"set" => Some(TokenKind::Set),
"target" => Some(TokenKind::Target),
"meta" => Some(TokenKind::Meta),
"using" => Some(TokenKind::Using),
_ => None,
}
}
fn slash_is_regexp(last: Option<&TokenKind>) -> bool {
match last {
None => true,
Some(k) => !matches!(
k,
TokenKind::Identifier
| TokenKind::PrivateIdentifier
| TokenKind::NumericLiteral
| TokenKind::StringLiteral
| TokenKind::NoSubstitutionTemplate
| TokenKind::TemplateTail
| TokenKind::RegExpLiteral
| TokenKind::RightParen
| TokenKind::RightBracket
| TokenKind::PlusPlus
| TokenKind::MinusMinus
| TokenKind::True
| TokenKind::False
| TokenKind::Null
| TokenKind::This
| TokenKind::Super
),
}
}
#[derive(Clone)]
pub struct Scanner<'src> {
source: &'src str,
pos: usize,
line: u32,
column: u32,
template_stack: Vec<usize>,
brace_depth: usize,
last_significant_kind: Option<TokenKind>,
}
impl<'src> Scanner<'src> {
pub fn new(source: &'src str) -> Self {
Self {
source,
pos: 0,
line: 1,
column: 1,
template_stack: Vec::new(),
brace_depth: 0,
last_significant_kind: None,
}
}
pub fn source(&self) -> &'src str {
self.source
}
pub fn is_eof(&self) -> bool {
self.pos >= self.source.len()
}
fn peek(&self) -> Option<char> {
self.source[self.pos..].chars().next()
}
fn peek2(&self) -> Option<char> {
let mut it = self.source[self.pos..].chars();
it.next();
it.next()
}
fn advance(&mut self) -> char {
let ch = self.source[self.pos..]
.chars()
.next()
.expect("advance called past end of input");
self.pos += ch.len_utf8();
match ch {
'\r' => {
if self.source[self.pos..].starts_with('\n') {
self.pos += 1;
}
self.line += 1;
self.column = 1;
}
'\n' | '\u{2028}' | '\u{2029}' => {
self.line += 1;
self.column = 1;
}
_ => {
self.column += 1;
}
}
ch
}
fn current_pos(&self) -> Position {
Position {
offset: self.pos,
line: self.line,
column: self.column,
}
}
fn skip_whitespace(&mut self) -> bool {
let mut had_lt = false;
while let Some(c) = self.peek() {
if !is_js_whitespace(c) {
break;
}
if is_line_terminator(c) {
had_lt = true;
}
self.advance();
}
had_lt
}
fn scan_digit_sequence(
&mut self,
mut saw_digit: bool,
require_digit: bool,
is_digit: impl Fn(char) -> bool,
kind: &'static str,
) -> StatorResult<()> {
let mut previous_was_separator = false;
while let Some(c) = self.peek() {
if is_digit(c) {
saw_digit = true;
previous_was_separator = false;
self.advance();
} else if c == '_' {
if !saw_digit || previous_was_separator {
return Err(StatorError::SyntaxError(format!(
"invalid numeric separator in {kind} literal"
)));
}
previous_was_separator = true;
self.advance();
} else {
break;
}
}
if previous_was_separator {
return Err(StatorError::SyntaxError(format!(
"invalid numeric separator in {kind} literal"
)));
}
if require_digit && !saw_digit {
return Err(StatorError::SyntaxError(format!(
"missing digits in {kind} literal"
)));
}
Ok(())
}
fn scan_decimal_digits(&mut self, saw_digit: bool, require_digit: bool) -> StatorResult<()> {
self.scan_digit_sequence(saw_digit, require_digit, |c| c.is_ascii_digit(), "decimal")
}
fn scan_hex_digits(&mut self) -> StatorResult<()> {
self.scan_digit_sequence(false, true, |c| c.is_ascii_hexdigit(), "hex")
}
fn scan_binary_digits(&mut self) -> StatorResult<()> {
self.scan_digit_sequence(false, true, |c| matches!(c, '0' | '1'), "binary")
}
fn scan_octal_digits(&mut self) -> StatorResult<()> {
self.scan_digit_sequence(false, true, |c| matches!(c, '0'..='7'), "octal")
}
fn scan_escape_sequence(&mut self) -> StatorResult<()> {
match self.peek() {
None => Err(StatorError::SyntaxError(
"unterminated escape sequence".into(),
)),
Some(c) => {
self.advance();
match c {
'u' => {
if self.peek() == Some('{') {
self.advance(); let mut count = 0usize;
while matches!(self.peek(), Some(d) if d.is_ascii_hexdigit()) {
self.advance();
count += 1;
}
if count == 0 {
return Err(StatorError::SyntaxError(
"invalid Unicode escape sequence".into(),
));
}
if self.peek() != Some('}') {
return Err(StatorError::SyntaxError(
"expected '}' in Unicode escape sequence".into(),
));
}
self.advance(); } else {
for _ in 0..4 {
match self.peek() {
Some(d) if d.is_ascii_hexdigit() => {
self.advance();
}
_ => {
return Err(StatorError::SyntaxError(
"invalid Unicode escape sequence".into(),
));
}
}
}
}
}
'x' => {
for _ in 0..2 {
match self.peek() {
Some(d) if d.is_ascii_hexdigit() => {
self.advance();
}
_ => {
return Err(StatorError::SyntaxError(
"invalid hex escape sequence".into(),
));
}
}
}
}
_ => {}
}
Ok(())
}
}
}
fn scan_string(&mut self, quote: char, start: Position) -> StatorResult<Token> {
let raw_start = start.offset;
loop {
match self.peek() {
None | Some('\n') | Some('\r') | Some('\u{2028}') | Some('\u{2029}') => {
return Err(StatorError::SyntaxError(
"unterminated string literal".into(),
));
}
Some(c) if c == quote => {
self.advance();
break;
}
Some('\\') => {
self.advance(); self.scan_escape_sequence()?;
}
_ => {
self.advance();
}
}
}
let raw = self.source[raw_start..self.pos].to_string();
let end = self.current_pos();
Ok(Token {
kind: TokenKind::StringLiteral,
value: TokenValue::Str(raw),
span: Span { start, end },
had_line_terminator_before: false, })
}
fn scan_template_body(&mut self) -> StatorResult<(String, bool)> {
let body_start = self.pos;
loop {
match self.peek() {
None => {
return Err(StatorError::SyntaxError(
"unterminated template literal".into(),
));
}
Some('`') => {
let raw = self.source[body_start..self.pos].to_string();
self.advance(); return Ok((raw, false));
}
Some('$') if self.peek2() == Some('{') => {
let raw = self.source[body_start..self.pos].to_string();
self.advance(); self.advance(); return Ok((raw, true));
}
Some('\\') => {
self.advance(); if let Some(nc) = self.peek() {
if nc == '\r' {
self.advance();
if self.peek() == Some('\n') {
self.advance();
}
} else {
self.advance();
}
}
}
_ => {
self.advance();
}
}
}
}
fn scan_regexp(&mut self, start: Position) -> StatorResult<Token> {
let raw_start = start.offset;
let mut in_class = false;
loop {
match self.peek() {
None | Some('\n') | Some('\r') | Some('\u{2028}') | Some('\u{2029}') => {
return Err(StatorError::SyntaxError(
"unterminated regular expression literal".into(),
));
}
Some('[') => {
in_class = true;
self.advance();
}
Some(']') => {
in_class = false;
self.advance();
}
Some('/') if !in_class => {
self.advance(); break;
}
Some('\\') => {
self.advance(); match self.peek() {
None | Some('\n') | Some('\r') | Some('\u{2028}') | Some('\u{2029}') => {
return Err(StatorError::SyntaxError(
"unterminated regular expression literal".into(),
));
}
_ => {
self.advance();
}
}
}
_ => {
self.advance();
}
}
}
while matches!(self.peek(), Some(c) if c.is_ascii_alphabetic()) {
self.advance();
}
let raw = self.source[raw_start..self.pos].to_string();
let end = self.current_pos();
Ok(Token {
kind: TokenKind::RegExpLiteral,
value: TokenValue::Str(raw),
span: Span { start, end },
had_line_terminator_before: false, })
}
fn scan_numeric(&mut self, first: char, start: Position) -> StatorResult<Token> {
let num_start = start.offset;
if first == '0' {
match self.peek() {
Some('_') => {
return Err(StatorError::SyntaxError(
"invalid numeric separator in decimal literal".into(),
));
}
Some('x') | Some('X') => {
self.advance();
self.scan_hex_digits()?;
if self.peek() == Some('n') {
self.advance();
}
}
Some('o') | Some('O') => {
self.advance();
self.scan_octal_digits()?;
if self.peek() == Some('n') {
self.advance();
}
}
Some('b') | Some('B') => {
self.advance();
self.scan_binary_digits()?;
if self.peek() == Some('n') {
self.advance();
}
}
Some(c) if c.is_ascii_digit() => {
self.scan_decimal_digits(true, false)?;
if matches!(self.peek(), Some('.')) {
self.advance();
self.scan_decimal_digits(false, false)?;
self.scan_exponent()?;
} else {
self.scan_exponent()?;
}
}
Some('.') => {
self.advance();
self.scan_decimal_digits(false, false)?;
self.scan_exponent()?;
}
Some('e') | Some('E') => {
self.scan_exponent()?;
}
Some('n') => {
self.advance(); }
_ => {} }
} else if first == '.' {
self.scan_decimal_digits(false, true)?;
self.scan_exponent()?;
} else {
self.scan_decimal_digits(true, false)?;
if self.peek() == Some('.') {
self.advance();
self.scan_decimal_digits(false, false)?;
self.scan_exponent()?;
} else if matches!(self.peek(), Some('e') | Some('E')) {
self.scan_exponent()?;
} else if self.peek() == Some('n') {
self.advance(); }
}
let raw = &self.source[num_start..self.pos];
let is_bigint = raw.ends_with('n');
let token_value = if is_bigint {
let numeric = raw[..raw.len() - 1].replace('_', "");
TokenValue::BigInt(numeric)
} else {
TokenValue::Number(parse_numeric_raw(raw))
};
let end = self.current_pos();
Ok(Token {
kind: TokenKind::NumericLiteral,
value: token_value,
span: Span { start, end },
had_line_terminator_before: false, })
}
fn scan_exponent(&mut self) -> StatorResult<()> {
if matches!(self.peek(), Some('e') | Some('E')) {
self.advance();
if matches!(self.peek(), Some('+') | Some('-')) {
self.advance();
}
self.scan_decimal_digits(false, true)?;
}
Ok(())
}
fn scan_identifier(&mut self, first: char, start: Position) -> Token {
let id_start = start.offset;
if first == '\\' {
self.scan_unicode_escape_in_id_rest();
}
loop {
match self.peek() {
Some(c) if is_id_continue(c) => {
self.advance();
}
Some('\\') if self.peek2() == Some('u') => {
self.advance(); self.scan_unicode_escape_in_id_rest();
}
_ => break,
}
}
let raw = self.source[id_start..self.pos].to_string();
let has_escape = first == '\\' || raw.contains('\\');
let name = if has_escape {
decode_unicode_escapes(&raw)
} else {
raw
};
let kind = if has_escape {
TokenKind::Identifier
} else {
keyword_kind(&name).unwrap_or(TokenKind::Identifier)
};
let value = match &kind {
TokenKind::Identifier => TokenValue::Str(name),
_ => TokenValue::None,
};
let end = self.current_pos();
Token {
kind,
value,
span: Span { start, end },
had_line_terminator_before: false,
}
}
fn scan_unicode_escape_in_id_rest(&mut self) {
if self.peek() != Some('u') {
return;
}
self.advance(); if self.peek() == Some('{') {
self.advance();
while matches!(self.peek(), Some(c) if c.is_ascii_hexdigit()) {
self.advance();
}
if self.peek() == Some('}') {
self.advance();
}
} else {
for _ in 0..4 {
if matches!(self.peek(), Some(c) if c.is_ascii_hexdigit()) {
self.advance();
}
}
}
}
pub fn next_token(&mut self) -> StatorResult<Token> {
let had_lt = self.skip_whitespace();
if self.is_eof() {
return Ok(Token {
kind: TokenKind::Eof,
value: TokenValue::None,
span: Span {
start: self.current_pos(),
end: self.current_pos(),
},
had_line_terminator_before: had_lt,
});
}
if self.pos == 0 && self.peek() == Some('#') && self.peek2() == Some('!') {
let start = self.current_pos();
self.advance(); self.advance(); while let Some(c) = self.peek() {
if is_line_terminator(c) {
break;
}
self.advance();
}
let text = self.source[..self.pos].to_string();
let end = self.current_pos();
return Ok(Token {
kind: TokenKind::HashbangComment,
value: TokenValue::Str(text),
span: Span { start, end },
had_line_terminator_before: had_lt,
});
}
let start = self.current_pos();
let c = self.advance();
let tok = match c {
'/' => {
match self.peek() {
Some('/') => {
self.advance(); let text_start = self.pos;
while let Some(ch) = self.peek() {
if is_line_terminator(ch) {
break;
}
self.advance();
}
let text = self.source[text_start..self.pos].to_string();
let end = self.current_pos();
return Ok(Token {
kind: TokenKind::SingleLineComment,
value: TokenValue::Str(text),
span: Span { start, end },
had_line_terminator_before: had_lt,
});
}
Some('*') => {
self.advance(); let text_start = self.pos;
let mut inner_lt = false;
loop {
match self.peek() {
None => {
return Err(StatorError::SyntaxError(
"unterminated block comment".into(),
));
}
Some('*') if self.peek2() == Some('/') => {
let text = self.source[text_start..self.pos].to_string();
self.advance(); self.advance(); let end = self.current_pos();
return Ok(Token {
kind: TokenKind::MultiLineComment,
value: TokenValue::Str(text),
span: Span { start, end },
had_line_terminator_before: had_lt || inner_lt,
});
}
Some(ch) => {
if is_line_terminator(ch) {
inner_lt = true;
}
self.advance();
}
}
}
}
Some('=') if !slash_is_regexp(self.last_significant_kind.as_ref()) => {
self.advance();
Token {
kind: TokenKind::SlashEqual,
value: TokenValue::None,
span: Span {
start,
end: self.current_pos(),
},
had_line_terminator_before: had_lt,
}
}
_ => {
if slash_is_regexp(self.last_significant_kind.as_ref()) {
let mut tok = self.scan_regexp(start)?;
tok.had_line_terminator_before = had_lt;
self.last_significant_kind = Some(TokenKind::RegExpLiteral);
return Ok(tok);
}
Token {
kind: TokenKind::Slash,
value: TokenValue::None,
span: Span {
start,
end: self.current_pos(),
},
had_line_terminator_before: had_lt,
}
}
}
}
'"' | '\'' => {
let mut tok = self.scan_string(c, start)?;
tok.had_line_terminator_before = had_lt;
self.last_significant_kind = Some(TokenKind::StringLiteral);
return Ok(tok);
}
'`' => {
let (raw, has_sub) = self.scan_template_body()?;
let end = self.current_pos();
let kind = if has_sub {
self.template_stack.push(self.brace_depth);
TokenKind::TemplateHead
} else {
TokenKind::NoSubstitutionTemplate
};
self.last_significant_kind = Some(kind);
return Ok(Token {
kind,
value: TokenValue::Str(raw),
span: Span { start, end },
had_line_terminator_before: had_lt,
});
}
c if c.is_ascii_digit() => {
let mut tok = self.scan_numeric(c, start)?;
tok.had_line_terminator_before = had_lt;
self.last_significant_kind = Some(TokenKind::NumericLiteral);
return Ok(tok);
}
'.' => {
if matches!(self.peek(), Some(c) if c.is_ascii_digit()) {
let mut tok = self.scan_numeric('.', start)?;
tok.had_line_terminator_before = had_lt;
self.last_significant_kind = Some(TokenKind::NumericLiteral);
return Ok(tok);
} else if self.peek() == Some('.') && self.peek2() == Some('.') {
self.advance();
self.advance();
Token {
kind: TokenKind::DotDotDot,
value: TokenValue::None,
span: Span {
start,
end: self.current_pos(),
},
had_line_terminator_before: had_lt,
}
} else {
Token {
kind: TokenKind::Dot,
value: TokenValue::None,
span: Span {
start,
end: self.current_pos(),
},
had_line_terminator_before: had_lt,
}
}
}
'#' => {
if !matches!(self.peek(), Some(c) if is_id_start(c) || c == '\\') {
return Err(StatorError::SyntaxError(format!(
"invalid or unexpected token '#' at {}:{}",
start.line, start.column
)));
}
let name_start = self.pos;
loop {
match self.peek() {
Some(nc) if is_id_continue(nc) => {
self.advance();
}
Some('\\') if self.peek2() == Some('u') => {
self.advance();
self.scan_unicode_escape_in_id_rest();
}
_ => break,
}
}
let name = self.source[name_start..self.pos].to_string();
let end = self.current_pos();
Token {
kind: TokenKind::PrivateIdentifier,
value: TokenValue::Str(name),
span: Span { start, end },
had_line_terminator_before: had_lt,
}
}
c if is_id_start(c) => {
let mut tok = self.scan_identifier(c, start);
tok.had_line_terminator_before = had_lt;
self.last_significant_kind = Some(tok.kind);
return Ok(tok);
}
'\\' if self.peek() == Some('u') => {
let mut tok = self.scan_identifier('\\', start);
tok.had_line_terminator_before = had_lt;
self.last_significant_kind = Some(tok.kind);
return Ok(tok);
}
'}' => {
if let Some(&depth) = self.template_stack.last()
&& self.brace_depth == depth
{
self.template_stack.pop();
let (raw, has_sub) = self.scan_template_body()?;
let end = self.current_pos();
let kind = if has_sub {
self.template_stack.push(self.brace_depth);
TokenKind::TemplateMiddle
} else {
TokenKind::TemplateTail
};
self.last_significant_kind = Some(kind);
return Ok(Token {
kind,
value: TokenValue::Str(raw),
span: Span { start, end },
had_line_terminator_before: had_lt,
});
}
self.brace_depth = self.brace_depth.saturating_sub(1);
Token {
kind: TokenKind::RightBrace,
value: TokenValue::None,
span: Span {
start,
end: self.current_pos(),
},
had_line_terminator_before: had_lt,
}
}
'{' => {
self.brace_depth += 1;
Token {
kind: TokenKind::LeftBrace,
value: TokenValue::None,
span: Span {
start,
end: self.current_pos(),
},
had_line_terminator_before: had_lt,
}
}
'(' => Token {
kind: TokenKind::LeftParen,
value: TokenValue::None,
span: Span {
start,
end: self.current_pos(),
},
had_line_terminator_before: had_lt,
},
')' => Token {
kind: TokenKind::RightParen,
value: TokenValue::None,
span: Span {
start,
end: self.current_pos(),
},
had_line_terminator_before: had_lt,
},
'[' => Token {
kind: TokenKind::LeftBracket,
value: TokenValue::None,
span: Span {
start,
end: self.current_pos(),
},
had_line_terminator_before: had_lt,
},
']' => Token {
kind: TokenKind::RightBracket,
value: TokenValue::None,
span: Span {
start,
end: self.current_pos(),
},
had_line_terminator_before: had_lt,
},
';' => Token {
kind: TokenKind::Semicolon,
value: TokenValue::None,
span: Span {
start,
end: self.current_pos(),
},
had_line_terminator_before: had_lt,
},
',' => Token {
kind: TokenKind::Comma,
value: TokenValue::None,
span: Span {
start,
end: self.current_pos(),
},
had_line_terminator_before: had_lt,
},
'~' => Token {
kind: TokenKind::Tilde,
value: TokenValue::None,
span: Span {
start,
end: self.current_pos(),
},
had_line_terminator_before: had_lt,
},
':' => Token {
kind: TokenKind::Colon,
value: TokenValue::None,
span: Span {
start,
end: self.current_pos(),
},
had_line_terminator_before: had_lt,
},
'<' => {
let kind = if self.peek() == Some('<') {
self.advance();
if self.peek() == Some('=') {
self.advance();
TokenKind::LessLessEqual
} else {
TokenKind::LessLess
}
} else if self.peek() == Some('=') {
self.advance();
TokenKind::LessEqual
} else {
TokenKind::Less
};
Token {
kind,
value: TokenValue::None,
span: Span {
start,
end: self.current_pos(),
},
had_line_terminator_before: had_lt,
}
}
'>' => {
let kind = if self.peek() == Some('>') {
self.advance();
if self.peek() == Some('>') {
self.advance();
if self.peek() == Some('=') {
self.advance();
TokenKind::GreaterGreaterGreaterEqual
} else {
TokenKind::GreaterGreaterGreater
}
} else if self.peek() == Some('=') {
self.advance();
TokenKind::GreaterGreaterEqual
} else {
TokenKind::GreaterGreater
}
} else if self.peek() == Some('=') {
self.advance();
TokenKind::GreaterEqual
} else {
TokenKind::Greater
};
Token {
kind,
value: TokenValue::None,
span: Span {
start,
end: self.current_pos(),
},
had_line_terminator_before: had_lt,
}
}
'=' => {
let kind = if self.peek() == Some('=') {
self.advance();
if self.peek() == Some('=') {
self.advance();
TokenKind::EqualEqualEqual
} else {
TokenKind::EqualEqual
}
} else if self.peek() == Some('>') {
self.advance();
TokenKind::Arrow
} else {
TokenKind::Equal
};
Token {
kind,
value: TokenValue::None,
span: Span {
start,
end: self.current_pos(),
},
had_line_terminator_before: had_lt,
}
}
'!' => {
let kind = if self.peek() == Some('=') {
self.advance();
if self.peek() == Some('=') {
self.advance();
TokenKind::BangEqualEqual
} else {
TokenKind::BangEqual
}
} else {
TokenKind::Bang
};
Token {
kind,
value: TokenValue::None,
span: Span {
start,
end: self.current_pos(),
},
had_line_terminator_before: had_lt,
}
}
'+' => {
let kind = if self.peek() == Some('+') {
self.advance();
TokenKind::PlusPlus
} else if self.peek() == Some('=') {
self.advance();
TokenKind::PlusEqual
} else {
TokenKind::Plus
};
Token {
kind,
value: TokenValue::None,
span: Span {
start,
end: self.current_pos(),
},
had_line_terminator_before: had_lt,
}
}
'-' => {
let kind = if self.peek() == Some('-') {
self.advance();
TokenKind::MinusMinus
} else if self.peek() == Some('=') {
self.advance();
TokenKind::MinusEqual
} else {
TokenKind::Minus
};
Token {
kind,
value: TokenValue::None,
span: Span {
start,
end: self.current_pos(),
},
had_line_terminator_before: had_lt,
}
}
'*' => {
let kind = if self.peek() == Some('*') {
self.advance();
if self.peek() == Some('=') {
self.advance();
TokenKind::StarStarEqual
} else {
TokenKind::StarStar
}
} else if self.peek() == Some('=') {
self.advance();
TokenKind::StarEqual
} else {
TokenKind::Star
};
Token {
kind,
value: TokenValue::None,
span: Span {
start,
end: self.current_pos(),
},
had_line_terminator_before: had_lt,
}
}
'%' => {
let kind = if self.peek() == Some('=') {
self.advance();
TokenKind::PercentEqual
} else {
TokenKind::Percent
};
Token {
kind,
value: TokenValue::None,
span: Span {
start,
end: self.current_pos(),
},
had_line_terminator_before: had_lt,
}
}
'&' => {
let kind = if self.peek() == Some('&') {
self.advance();
if self.peek() == Some('=') {
self.advance();
TokenKind::AmpersandAmpersandEqual
} else {
TokenKind::AmpersandAmpersand
}
} else if self.peek() == Some('=') {
self.advance();
TokenKind::AmpersandEqual
} else {
TokenKind::Ampersand
};
Token {
kind,
value: TokenValue::None,
span: Span {
start,
end: self.current_pos(),
},
had_line_terminator_before: had_lt,
}
}
'|' => {
let kind = if self.peek() == Some('|') {
self.advance();
if self.peek() == Some('=') {
self.advance();
TokenKind::PipePipeEqual
} else {
TokenKind::PipePipe
}
} else if self.peek() == Some('=') {
self.advance();
TokenKind::PipeEqual
} else {
TokenKind::Pipe
};
Token {
kind,
value: TokenValue::None,
span: Span {
start,
end: self.current_pos(),
},
had_line_terminator_before: had_lt,
}
}
'^' => {
let kind = if self.peek() == Some('=') {
self.advance();
TokenKind::CaretEqual
} else {
TokenKind::Caret
};
Token {
kind,
value: TokenValue::None,
span: Span {
start,
end: self.current_pos(),
},
had_line_terminator_before: had_lt,
}
}
'?' => {
let kind = if self.peek() == Some('?') {
self.advance();
if self.peek() == Some('=') {
self.advance();
TokenKind::QuestionQuestionEqual
} else {
TokenKind::QuestionQuestion
}
} else if self.peek() == Some('.')
&& !matches!(self.peek2(), Some(d) if d.is_ascii_digit())
{
self.advance();
TokenKind::QuestionDot
} else {
TokenKind::Question
};
Token {
kind,
value: TokenValue::None,
span: Span {
start,
end: self.current_pos(),
},
had_line_terminator_before: had_lt,
}
}
_ => {
return Err(StatorError::SyntaxError(format!(
"unexpected character {:?} at line {}, column {}",
c, start.line, start.column
)));
}
};
self.last_significant_kind = Some(tok.kind);
Ok(tok)
}
pub fn tokenize_all(source: &'src str) -> StatorResult<Vec<Token>> {
let mut scanner = Scanner::new(source);
let mut tokens = Vec::new();
loop {
let tok = scanner.next_token()?;
if tok.kind == TokenKind::Eof {
break;
}
tokens.push(tok);
}
Ok(tokens)
}
}
fn take_hex(chars: &mut std::iter::Peekable<std::str::Chars<'_>>, n: usize) -> String {
let mut hex = String::with_capacity(n);
for _ in 0..n {
match chars.peek() {
Some(&d) if d.is_ascii_hexdigit() => {
hex.push(d);
chars.next();
}
_ => break,
}
}
hex
}
pub(crate) fn decode_unicode_escapes(raw: &str) -> String {
if !raw.contains('\\') {
return raw.to_owned();
}
let mut out = String::with_capacity(raw.len());
let mut chars = raw.chars().peekable();
while let Some(c) = chars.next() {
if c == '\\' && chars.peek() == Some(&'u') {
chars.next(); if chars.peek() == Some(&'{') {
chars.next(); let mut hex = String::new();
while let Some(&d) = chars.peek() {
if d == '}' {
chars.next();
break;
}
hex.push(d);
chars.next();
}
if let Some(cp) = u32::from_str_radix(&hex, 16).ok().and_then(char::from_u32) {
out.push(cp);
}
} else {
let h = take_hex(&mut chars, 4);
if let Some(cp) = u32::from_str_radix(&h, 16).ok().and_then(char::from_u32) {
out.push(cp);
}
}
} else {
out.push(c);
}
}
out
}
fn flush_pending_template_surrogate(out: &mut String, pending_high_surrogate: &mut Option<u16>) {
if let Some(high) = pending_high_surrogate.take() {
out.push_str(&String::from_utf16_lossy(&[high]));
}
}
fn push_template_code_unit(out: &mut String, pending_high_surrogate: &mut Option<u16>, unit: u16) {
match unit {
0xD800..=0xDBFF => {
flush_pending_template_surrogate(out, pending_high_surrogate);
*pending_high_surrogate = Some(unit);
}
0xDC00..=0xDFFF => {
if let Some(high) = pending_high_surrogate.take() {
out.push_str(&String::from_utf16_lossy(&[high, unit]));
} else {
out.push_str(&String::from_utf16_lossy(&[unit]));
}
}
_ => {
flush_pending_template_surrogate(out, pending_high_surrogate);
if let Some(ch) = char::from_u32(u32::from(unit)) {
out.push(ch);
}
}
}
}
fn push_template_char(out: &mut String, pending_high_surrogate: &mut Option<u16>, ch: char) {
flush_pending_template_surrogate(out, pending_high_surrogate);
out.push(ch);
}
pub(crate) fn cook_template_raw(raw: &str) -> Option<String> {
let mut out = String::with_capacity(raw.len());
let mut chars = raw.chars().peekable();
let mut pending_high_surrogate = None;
while let Some(c) = chars.next() {
if c != '\\' {
if c == '\r' {
push_template_char(&mut out, &mut pending_high_surrogate, '\n');
if chars.peek() == Some(&'\n') {
chars.next();
}
} else {
push_template_char(&mut out, &mut pending_high_surrogate, c);
}
continue;
}
match chars.next() {
None => return None,
Some('n') => push_template_char(&mut out, &mut pending_high_surrogate, '\n'),
Some('r') => push_template_char(&mut out, &mut pending_high_surrogate, '\r'),
Some('t') => push_template_char(&mut out, &mut pending_high_surrogate, '\t'),
Some('b') => push_template_char(&mut out, &mut pending_high_surrogate, '\u{0008}'),
Some('f') => push_template_char(&mut out, &mut pending_high_surrogate, '\u{000C}'),
Some('v') => push_template_char(&mut out, &mut pending_high_surrogate, '\u{000B}'),
Some('\\') => push_template_char(&mut out, &mut pending_high_surrogate, '\\'),
Some('\'') => push_template_char(&mut out, &mut pending_high_surrogate, '\''),
Some('"') => push_template_char(&mut out, &mut pending_high_surrogate, '"'),
Some('`') => push_template_char(&mut out, &mut pending_high_surrogate, '`'),
Some('$') => push_template_char(&mut out, &mut pending_high_surrogate, '$'),
Some('0') if !matches!(chars.peek(), Some('0'..='9')) => {
push_template_char(&mut out, &mut pending_high_surrogate, '\0');
}
Some('x') => {
let h = take_hex(&mut chars, 2);
if h.len() != 2 {
return None;
}
match u32::from_str_radix(&h, 16).ok().and_then(char::from_u32) {
Some(cp) => push_template_char(&mut out, &mut pending_high_surrogate, cp),
None => return None,
}
}
Some('u') => {
if chars.peek() == Some(&'{') {
chars.next(); let mut hex = String::new();
loop {
match chars.next() {
Some('}') => break,
Some(d) if d.is_ascii_hexdigit() => hex.push(d),
_ => return None,
}
}
if hex.is_empty() {
return None;
}
match u32::from_str_radix(&hex, 16).ok().and_then(char::from_u32) {
Some(cp) => push_template_char(&mut out, &mut pending_high_surrogate, cp),
None => return None,
}
} else {
let h = take_hex(&mut chars, 4);
if h.len() != 4 {
return None;
}
match u16::from_str_radix(&h, 16) {
Ok(unit) => {
push_template_code_unit(&mut out, &mut pending_high_surrogate, unit);
}
Err(_) => return None,
}
}
}
Some('\n') => {}
Some('\r') => {
if chars.peek() == Some(&'\n') {
chars.next();
}
}
Some('\u{2028}') | Some('\u{2029}') => {}
Some(_other) => return None,
}
}
flush_pending_template_surrogate(&mut out, &mut pending_high_surrogate);
Some(out)
}
fn parse_numeric_raw(raw: &str) -> f64 {
let clean: String = raw.chars().filter(|&c| c != '_' && c != 'n').collect();
if clean.starts_with("0x") || clean.starts_with("0X") {
i64::from_str_radix(&clean[2..], 16)
.map(|n| n as f64)
.unwrap_or(f64::NAN)
} else if clean.starts_with("0o") || clean.starts_with("0O") {
i64::from_str_radix(&clean[2..], 8)
.map(|n| n as f64)
.unwrap_or(f64::NAN)
} else if clean.starts_with("0b") || clean.starts_with("0B") {
i64::from_str_radix(&clean[2..], 2)
.map(|n| n as f64)
.unwrap_or(f64::NAN)
} else if clean.len() >= 2
&& clean.starts_with('0')
&& clean.as_bytes()[1].is_ascii_digit()
&& clean[1..].chars().all(|c| matches!(c, '0'..='7'))
{
i64::from_str_radix(&clean[1..], 8)
.map(|n| n as f64)
.unwrap_or(f64::NAN)
} else {
clean.parse::<f64>().unwrap_or(f64::NAN)
}
}
#[cfg(test)]
mod tests {
use super::*;
fn kinds(src: &str) -> Vec<TokenKind> {
Scanner::tokenize_all(src)
.unwrap()
.into_iter()
.filter(|t| {
!matches!(
t.kind,
TokenKind::SingleLineComment
| TokenKind::MultiLineComment
| TokenKind::HashbangComment
)
})
.map(|t| t.kind)
.collect()
}
fn tokens(src: &str) -> Vec<Token> {
Scanner::tokenize_all(src).unwrap()
}
#[test]
fn test_keywords_reserved() {
let src = "break case catch class const continue debugger default \
delete do else enum export extends false finally for \
function if import in instanceof let new null of return \
static super switch this throw true try typeof var void \
while with yield await";
let toks = kinds(src);
assert_eq!(
toks,
vec![
TokenKind::Break,
TokenKind::Case,
TokenKind::Catch,
TokenKind::Class,
TokenKind::Const,
TokenKind::Continue,
TokenKind::Debugger,
TokenKind::Default,
TokenKind::Delete,
TokenKind::Do,
TokenKind::Else,
TokenKind::Enum,
TokenKind::Export,
TokenKind::Extends,
TokenKind::False,
TokenKind::Finally,
TokenKind::For,
TokenKind::Function,
TokenKind::If,
TokenKind::Import,
TokenKind::In,
TokenKind::Instanceof,
TokenKind::Let,
TokenKind::New,
TokenKind::Null,
TokenKind::Of,
TokenKind::Return,
TokenKind::Static,
TokenKind::Super,
TokenKind::Switch,
TokenKind::This,
TokenKind::Throw,
TokenKind::True,
TokenKind::Try,
TokenKind::Typeof,
TokenKind::Var,
TokenKind::Void,
TokenKind::While,
TokenKind::With,
TokenKind::Yield,
TokenKind::Await,
]
);
}
#[test]
fn test_keywords_contextual() {
let toks = kinds("async from as get set target meta");
assert_eq!(
toks,
vec![
TokenKind::Async,
TokenKind::From,
TokenKind::As,
TokenKind::Get,
TokenKind::Set,
TokenKind::Target,
TokenKind::Meta,
]
);
}
#[test]
fn test_identifier_simple() {
let toks = tokens("foo _bar $baz");
assert_eq!(toks[0].kind, TokenKind::Identifier);
assert_eq!(toks[0].value, TokenValue::Str("foo".into()));
assert_eq!(toks[1].kind, TokenKind::Identifier);
assert_eq!(toks[2].kind, TokenKind::Identifier);
}
#[test]
fn test_identifier_unicode() {
let toks = tokens("café");
assert_eq!(toks[0].kind, TokenKind::Identifier);
assert_eq!(toks[0].value, TokenValue::Str("café".into()));
}
#[test]
fn test_private_identifier() {
let toks = tokens("#foo #_bar");
assert_eq!(toks[0].kind, TokenKind::PrivateIdentifier);
assert_eq!(toks[0].value, TokenValue::Str("foo".into()));
assert_eq!(toks[1].kind, TokenKind::PrivateIdentifier);
assert_eq!(toks[1].value, TokenValue::Str("_bar".into()));
}
#[test]
fn test_numeric_decimal_integers() {
let toks = tokens("0 42 100");
for t in &toks {
assert_eq!(t.kind, TokenKind::NumericLiteral);
}
assert_eq!(toks[0].value, TokenValue::Number(0.0));
assert_eq!(toks[1].value, TokenValue::Number(42.0));
assert_eq!(toks[2].value, TokenValue::Number(100.0));
}
#[test]
fn test_numeric_floats() {
let toks = tokens("1.5 .5 1e3 1.5e-2 0.0");
assert_eq!(toks[0].value, TokenValue::Number(1.5));
assert_eq!(toks[1].value, TokenValue::Number(0.5));
assert_eq!(toks[2].value, TokenValue::Number(1000.0));
assert_eq!(toks[3].value, TokenValue::Number(0.015));
assert_eq!(toks[4].value, TokenValue::Number(0.0));
}
#[test]
fn test_numeric_hex() {
let toks = tokens("0xFF 0x1A 0X0a");
assert_eq!(toks[0].value, TokenValue::Number(255.0));
assert_eq!(toks[1].value, TokenValue::Number(26.0));
assert_eq!(toks[2].value, TokenValue::Number(10.0));
}
#[test]
fn test_numeric_binary() {
let toks = tokens("0b1010 0B1111");
assert_eq!(toks[0].value, TokenValue::Number(10.0));
assert_eq!(toks[1].value, TokenValue::Number(15.0));
}
#[test]
fn test_numeric_octal() {
let toks = tokens("0o17 0O7");
assert_eq!(toks[0].value, TokenValue::Number(15.0));
assert_eq!(toks[1].value, TokenValue::Number(7.0));
}
#[test]
fn test_numeric_bigint() {
let toks = tokens("123n 0n 0b1n");
for t in &toks {
assert_eq!(t.kind, TokenKind::NumericLiteral);
}
}
#[test]
fn test_numeric_separator() {
let toks = tokens("1_000_000 0xFF_FF 0o7_7 0b10_10 1_2n");
assert_eq!(toks[0].value, TokenValue::Number(1_000_000.0));
assert_eq!(toks[1].value, TokenValue::Number(0xFFFF as f64));
assert_eq!(toks[2].value, TokenValue::Number(0o77 as f64));
assert_eq!(toks[3].value, TokenValue::Number(0b1010 as f64));
assert_eq!(toks[4].value, TokenValue::BigInt("12".into()));
}
#[test]
fn test_numeric_separator_invalid() {
for src in ["1__0", "1_", "0_1", "0x_FF", "0o7_", "0b10__10", "1e_3"] {
assert!(
Scanner::tokenize_all(src).is_err(),
"{src} should be rejected"
);
}
}
#[test]
fn test_numeric_prefixed_requires_digits() {
for src in ["0x", "0o", "0b"] {
assert!(
Scanner::tokenize_all(src).is_err(),
"{src} should be rejected"
);
}
}
#[test]
fn test_numeric_legacy_octal() {
let toks = tokens("0777");
assert_eq!(toks[0].value, TokenValue::Number(0o777 as f64));
}
#[test]
fn test_string_double_quote() {
let toks = tokens(r#""hello world""#);
assert_eq!(toks[0].kind, TokenKind::StringLiteral);
assert_eq!(toks[0].value, TokenValue::Str(r#""hello world""#.into()));
}
#[test]
fn test_string_single_quote() {
let toks = tokens("'it\\'s'");
assert_eq!(toks[0].kind, TokenKind::StringLiteral);
}
#[test]
fn test_string_escape_sequences() {
let toks = tokens(r#""\n\t\r\\\"" '\u0041' "\x41""#);
assert_eq!(toks[0].kind, TokenKind::StringLiteral);
assert_eq!(toks[1].kind, TokenKind::StringLiteral);
assert_eq!(toks[2].kind, TokenKind::StringLiteral);
}
#[test]
fn test_string_unterminated_error() {
let result = Scanner::tokenize_all(r#""unterminated"#);
assert!(result.is_err());
}
#[test]
fn test_template_no_substitution() {
let toks = tokens("`hello world`");
assert_eq!(toks[0].kind, TokenKind::NoSubstitutionTemplate);
assert_eq!(toks[0].value, TokenValue::Str("hello world".into()));
}
#[test]
fn test_template_with_substitution() {
let toks = tokens("`hello ${name}!`");
assert_eq!(toks[0].kind, TokenKind::TemplateHead);
assert_eq!(toks[0].value, TokenValue::Str("hello ".into()));
assert_eq!(toks[1].kind, TokenKind::Identifier); assert_eq!(toks[2].kind, TokenKind::TemplateTail);
assert_eq!(toks[2].value, TokenValue::Str("!".into()));
}
#[test]
fn test_template_multiple_substitutions() {
let toks = tokens("`${a} and ${b}`");
assert_eq!(toks[0].kind, TokenKind::TemplateHead);
assert_eq!(toks[0].value, TokenValue::Str("".into()));
assert_eq!(toks[1].kind, TokenKind::Identifier); assert_eq!(toks[2].kind, TokenKind::TemplateMiddle);
assert_eq!(toks[2].value, TokenValue::Str(" and ".into()));
assert_eq!(toks[3].kind, TokenKind::Identifier); assert_eq!(toks[4].kind, TokenKind::TemplateTail);
assert_eq!(toks[4].value, TokenValue::Str("".into()));
}
#[test]
fn test_template_nested() {
let toks = tokens("`outer ${`inner`} end`");
assert_eq!(toks[0].kind, TokenKind::TemplateHead);
assert_eq!(toks[0].value, TokenValue::Str("outer ".into()));
assert_eq!(toks[1].kind, TokenKind::NoSubstitutionTemplate);
assert_eq!(toks[1].value, TokenValue::Str("inner".into()));
assert_eq!(toks[2].kind, TokenKind::TemplateTail);
assert_eq!(toks[2].value, TokenValue::Str(" end".into()));
}
#[test]
fn test_template_expression_with_braces() {
let toks = tokens("`a ${{k:1}} b`");
assert_eq!(toks[0].kind, TokenKind::TemplateHead);
assert_eq!(toks[1].kind, TokenKind::LeftBrace);
assert_eq!(toks[2].kind, TokenKind::Identifier);
assert_eq!(toks[3].kind, TokenKind::Colon);
assert_eq!(toks[4].kind, TokenKind::NumericLiteral);
assert_eq!(toks[5].kind, TokenKind::RightBrace);
assert_eq!(toks[6].kind, TokenKind::TemplateTail);
}
#[test]
fn test_regexp_basic() {
let toks = tokens("/foo/gi");
assert_eq!(toks[0].kind, TokenKind::RegExpLiteral);
assert_eq!(toks[0].value, TokenValue::Str("/foo/gi".into()));
}
#[test]
fn test_regexp_with_char_class() {
let toks = tokens("/[a-z]+/");
assert_eq!(toks[0].kind, TokenKind::RegExpLiteral);
}
#[test]
fn test_regexp_escaped_slash() {
let toks = tokens(r#"/foo\/bar/"#);
assert_eq!(toks[0].kind, TokenKind::RegExpLiteral);
assert_eq!(toks[0].value, TokenValue::Str("/foo\\/bar/".into()));
}
#[test]
fn test_regexp_after_return() {
let toks = kinds("return /foo/");
assert_eq!(toks, vec![TokenKind::Return, TokenKind::RegExpLiteral]);
}
#[test]
fn test_division_after_identifier() {
let toks = kinds("x / y");
assert_eq!(
toks,
vec![
TokenKind::Identifier,
TokenKind::Slash,
TokenKind::Identifier
]
);
}
#[test]
fn test_division_after_number() {
let toks = kinds("4 / 2");
assert_eq!(
toks,
vec![
TokenKind::NumericLiteral,
TokenKind::Slash,
TokenKind::NumericLiteral
]
);
}
#[test]
fn test_regexp_after_assignment() {
let toks = kinds("x = /foo/");
assert_eq!(
toks,
vec![
TokenKind::Identifier,
TokenKind::Equal,
TokenKind::RegExpLiteral
]
);
}
#[test]
fn test_regexp_vs_division_after_paren() {
let toks = kinds("(a) / b");
assert_eq!(
toks,
vec![
TokenKind::LeftParen,
TokenKind::Identifier,
TokenKind::RightParen,
TokenKind::Slash,
TokenKind::Identifier
]
);
}
#[test]
fn test_hashbang_comment_at_start() {
let toks = tokens("#!/usr/bin/env node\nfoo");
assert_eq!(toks[0].kind, TokenKind::HashbangComment);
assert_eq!(toks[0].value, TokenValue::Str("#!/usr/bin/env node".into()));
assert_eq!(toks[1].kind, TokenKind::Identifier);
assert!(toks[1].had_line_terminator_before);
}
#[test]
#[ignore] fn test_hashbang_only_file() {
let toks = tokens("#!/usr/bin/env node");
assert_eq!(toks[0].kind, TokenKind::HashbangComment);
assert_eq!(toks[1].kind, TokenKind::Eof);
}
#[test]
fn test_hashbang_filtered_by_kinds() {
let k = kinds("#!/usr/bin/env node\nvar x");
assert_eq!(k, vec![TokenKind::Var, TokenKind::Identifier]);
}
#[test]
fn test_hash_not_at_start_is_error() {
let result = Scanner::tokenize_all(" #!");
assert!(result.is_err());
}
#[test]
fn test_single_line_comment() {
let toks = tokens("// this is a comment\nfoo");
assert_eq!(toks[0].kind, TokenKind::SingleLineComment);
assert_eq!(toks[0].value, TokenValue::Str(" this is a comment".into()));
assert_eq!(toks[1].kind, TokenKind::Identifier);
assert!(toks[1].had_line_terminator_before);
}
#[test]
fn test_block_comment() {
let toks = tokens("/* block */ foo");
assert_eq!(toks[0].kind, TokenKind::MultiLineComment);
assert_eq!(toks[0].value, TokenValue::Str(" block ".into()));
assert_eq!(toks[1].kind, TokenKind::Identifier);
}
#[test]
fn test_block_comment_with_line_terminator() {
let toks = tokens("/* line1\nline2 */ foo");
assert_eq!(toks[0].kind, TokenKind::MultiLineComment);
assert!(toks[0].had_line_terminator_before || toks[0].kind == TokenKind::MultiLineComment);
}
#[test]
fn test_punctuators_single() {
let toks = kinds("{ } ( ) [ ] . ; , ~ : ?");
assert_eq!(
toks,
vec![
TokenKind::LeftBrace,
TokenKind::RightBrace,
TokenKind::LeftParen,
TokenKind::RightParen,
TokenKind::LeftBracket,
TokenKind::RightBracket,
TokenKind::Dot,
TokenKind::Semicolon,
TokenKind::Comma,
TokenKind::Tilde,
TokenKind::Colon,
TokenKind::Question,
]
);
}
#[test]
fn test_punctuators_comparison() {
let toks = kinds("< > <= >= == != === !==");
assert_eq!(
toks,
vec![
TokenKind::Less,
TokenKind::Greater,
TokenKind::LessEqual,
TokenKind::GreaterEqual,
TokenKind::EqualEqual,
TokenKind::BangEqual,
TokenKind::EqualEqualEqual,
TokenKind::BangEqualEqual,
]
);
}
#[test]
fn test_punctuators_arithmetic() {
let toks = kinds("+ - * ** % ++ --");
assert_eq!(
toks,
vec![
TokenKind::Plus,
TokenKind::Minus,
TokenKind::Star,
TokenKind::StarStar,
TokenKind::Percent,
TokenKind::PlusPlus,
TokenKind::MinusMinus,
]
);
}
#[test]
fn test_punctuators_bitwise() {
let toks = kinds("<< >> >>> & | ^ !");
assert_eq!(
toks,
vec![
TokenKind::LessLess,
TokenKind::GreaterGreater,
TokenKind::GreaterGreaterGreater,
TokenKind::Ampersand,
TokenKind::Pipe,
TokenKind::Caret,
TokenKind::Bang,
]
);
}
#[test]
fn test_punctuators_logical() {
let toks = kinds("&& || ?? ?.");
assert_eq!(
toks,
vec![
TokenKind::AmpersandAmpersand,
TokenKind::PipePipe,
TokenKind::QuestionQuestion,
TokenKind::QuestionDot,
]
);
}
#[test]
fn test_punctuators_assignment() {
let toks = kinds("= += -= *= **= %= <<= >>= >>>= &= |= ^= &&= ||= ??=");
assert_eq!(
toks,
vec![
TokenKind::Equal,
TokenKind::PlusEqual,
TokenKind::MinusEqual,
TokenKind::StarEqual,
TokenKind::StarStarEqual,
TokenKind::PercentEqual,
TokenKind::LessLessEqual,
TokenKind::GreaterGreaterEqual,
TokenKind::GreaterGreaterGreaterEqual,
TokenKind::AmpersandEqual,
TokenKind::PipeEqual,
TokenKind::CaretEqual,
TokenKind::AmpersandAmpersandEqual,
TokenKind::PipePipeEqual,
TokenKind::QuestionQuestionEqual,
]
);
}
#[test]
fn test_punctuators_misc() {
let toks = kinds("=> ...");
assert_eq!(toks, vec![TokenKind::Arrow, TokenKind::DotDotDot,]);
}
#[test]
fn test_line_column_tracking() {
let toks = tokens("x\ny");
assert_eq!(toks[0].span.start.line, 1);
assert_eq!(toks[0].span.start.column, 1);
assert_eq!(toks[1].span.start.line, 2);
assert_eq!(toks[1].span.start.column, 1);
}
#[test]
fn test_crlf_counts_as_one_line() {
let toks = tokens("x\r\ny");
assert_eq!(toks[1].span.start.line, 2);
}
#[test]
fn test_asi_flag_set_on_newline() {
let toks = tokens("x\ny");
assert!(!toks[0].had_line_terminator_before);
assert!(toks[1].had_line_terminator_before);
}
#[test]
fn test_asi_flag_not_set_same_line() {
let toks = tokens("x y");
assert!(!toks[0].had_line_terminator_before);
assert!(!toks[1].had_line_terminator_before);
}
#[test]
fn test_tokenize_let_declaration() {
let toks = kinds("let x = 42;");
assert_eq!(
toks,
vec![
TokenKind::Let,
TokenKind::Identifier,
TokenKind::Equal,
TokenKind::NumericLiteral,
TokenKind::Semicolon,
]
);
}
#[test]
fn test_tokenize_function() {
let toks = kinds("function add(a, b) { return a + b; }");
assert_eq!(
toks,
vec![
TokenKind::Function,
TokenKind::Identifier,
TokenKind::LeftParen,
TokenKind::Identifier,
TokenKind::Comma,
TokenKind::Identifier,
TokenKind::RightParen,
TokenKind::LeftBrace,
TokenKind::Return,
TokenKind::Identifier,
TokenKind::Plus,
TokenKind::Identifier,
TokenKind::Semicolon,
TokenKind::RightBrace,
]
);
}
#[test]
fn test_tokenize_arrow_function() {
let toks = kinds("const f = (x) => x * 2;");
assert_eq!(
toks,
vec![
TokenKind::Const,
TokenKind::Identifier,
TokenKind::Equal,
TokenKind::LeftParen,
TokenKind::Identifier,
TokenKind::RightParen,
TokenKind::Arrow,
TokenKind::Identifier,
TokenKind::Star,
TokenKind::NumericLiteral,
TokenKind::Semicolon,
]
);
}
#[test]
fn test_tokenize_class() {
let toks = kinds("class Foo extends Bar {}");
assert_eq!(
toks,
vec![
TokenKind::Class,
TokenKind::Identifier,
TokenKind::Extends,
TokenKind::Identifier,
TokenKind::LeftBrace,
TokenKind::RightBrace,
]
);
}
#[test]
fn test_tokenize_import_export() {
let toks = kinds("import { x } from 'mod';");
assert_eq!(
toks,
vec![
TokenKind::Import,
TokenKind::LeftBrace,
TokenKind::Identifier,
TokenKind::RightBrace,
TokenKind::From,
TokenKind::StringLiteral,
TokenKind::Semicolon,
]
);
}
#[test]
fn test_tokenize_optional_chain() {
let toks = kinds("obj?.prop");
assert_eq!(
toks,
vec![
TokenKind::Identifier,
TokenKind::QuestionDot,
TokenKind::Identifier,
]
);
}
#[test]
fn test_error_unterminated_block_comment() {
let result = Scanner::tokenize_all("/* oops");
assert!(result.is_err());
}
#[test]
fn test_error_unterminated_template() {
let result = Scanner::tokenize_all("`oops");
assert!(result.is_err());
}
#[test]
fn test_error_unterminated_regexp() {
let result = Scanner::tokenize_all("/oops");
assert!(result.is_err());
}
#[test]
fn test_decode_unicode_escapes_4digit() {
assert_eq!(decode_unicode_escapes(r"\u0041"), "A");
assert_eq!(decode_unicode_escapes(r"\u0042\u0043"), "BC");
assert_eq!(decode_unicode_escapes(r"pre\u0041suf"), "preAsuf");
}
#[test]
fn test_decode_unicode_escapes_braced() {
assert_eq!(decode_unicode_escapes(r"\u{41}"), "A");
assert_eq!(decode_unicode_escapes(r"\u{0041}"), "A");
assert_eq!(decode_unicode_escapes(r"\u{1F600}"), "\u{1F600}");
}
#[test]
fn test_decode_unicode_escapes_no_escapes() {
assert_eq!(decode_unicode_escapes("hello"), "hello");
assert_eq!(decode_unicode_escapes(""), "");
}
#[test]
fn test_cook_template_raw_simple() {
assert_eq!(cook_template_raw("hello world"), Some("hello world".into()));
}
#[test]
fn test_cook_template_raw_unicode_4digit() {
assert_eq!(cook_template_raw(r"\u0041"), Some("A".into()));
}
#[test]
fn test_cook_template_raw_unicode_braced() {
assert_eq!(cook_template_raw(r"\u{42}"), Some("B".into()));
}
#[test]
fn test_cook_template_raw_hex_escape() {
assert_eq!(cook_template_raw(r"\x41"), Some("A".into()));
}
#[test]
fn test_cook_template_raw_standard_escapes() {
assert_eq!(cook_template_raw(r"\n\t\r"), Some("\n\t\r".into()));
assert_eq!(cook_template_raw(r"\\"), Some("\\".into()));
assert_eq!(cook_template_raw(r"\`"), Some("`".into()));
}
#[test]
fn test_cook_template_raw_invalid_returns_none() {
assert_eq!(cook_template_raw(r"\1"), None);
assert_eq!(cook_template_raw(r"\xG"), None);
assert_eq!(cook_template_raw(r"\u00G"), None);
assert_eq!(cook_template_raw(r"\unicode"), None);
}
#[test]
fn test_identifier_unicode_escape_decoded() {
let toks = tokens(r"\u0041");
assert_eq!(toks[0].kind, TokenKind::Identifier);
assert_eq!(toks[0].value, TokenValue::Str("A".into()));
}
#[test]
fn test_identifier_unicode_escape_braced_decoded() {
let toks = tokens(r"\u{42}");
assert_eq!(toks[0].kind, TokenKind::Identifier);
assert_eq!(toks[0].value, TokenValue::Str("B".into()));
}
#[test]
fn test_identifier_mixed_unicode_escape() {
let toks = tokens(r"h\u0065llo");
assert_eq!(toks[0].kind, TokenKind::Identifier);
assert_eq!(toks[0].value, TokenValue::Str("hello".into()));
}
}