mod token;
use std::error::Error;
use std::fmt;
use std::i64;
use std::iter::Peekable;
use std::str::CharIndices;
pub use self::token::Token;
type Spanned<Token, Location, Error> = Result<(Location, Token, Location), Error>;
#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)]
pub enum LexicalErrorCode {
ExpectedCommentBlockEnd,
ExpectedDecimalDigit,
ExpectedEllipsis,
ExpectedFloatExponent,
ExpectedHexadecimalDigit,
ExpectedKeywordInfinity,
ExpectedStringLiteralEnd,
}
#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)]
pub struct LexicalError {
pub code: LexicalErrorCode,
pub location: usize,
}
impl fmt::Display for LexicalError {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "{}", self.description())
}
}
impl Error for LexicalError {
fn description(&self) -> &str {
match self.code {
LexicalErrorCode::ExpectedCommentBlockEnd => "expected end of comment block",
LexicalErrorCode::ExpectedDecimalDigit => "expected decimal digit",
LexicalErrorCode::ExpectedEllipsis => "expected ellipsis",
LexicalErrorCode::ExpectedFloatExponent => "expected float exponent",
LexicalErrorCode::ExpectedHexadecimalDigit => "expected hexadecimal digit",
LexicalErrorCode::ExpectedKeywordInfinity => "expected \"Infinity\" keyword",
LexicalErrorCode::ExpectedStringLiteralEnd => "expected end of string literal",
}
}
}
#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)]
enum FloatLexState {
AfterDecimalPoint,
AfterExponentBase,
BeforeDecimalPoint,
ImmediatelyAfterDecimalPoint,
ImmediatelyAfterExponentBase,
ImmediatelyAfterExponentSign,
}
fn create_error(code: LexicalErrorCode, location: usize) -> LexicalError {
LexicalError { code, location }
}
#[derive(Clone, Debug)]
pub struct Lexer<'input> {
chars: Peekable<CharIndices<'input>>,
input: &'input str,
}
impl<'input> Lexer<'input> {
pub fn new(input: &'input str) -> Self {
Lexer {
chars: input.char_indices().peekable(),
input,
}
}
#[rustfmt::skip]
#[allow(unknown_lints)]
#[allow(clippy::cognitive_complexity)]
fn get_next_token(&mut self) -> Option<<Self as Iterator>::Item> {
loop {
return match self.chars.next() {
Some((_, '\t')) |
Some((_, '\n')) |
Some((_, '\r')) |
Some((_, ' ')) => continue,
Some((start, '_')) => {
match self.chars.peek() {
Some(&(_, c @ 'A'...'Z')) |
Some(&(_, c @ 'a'...'z')) => {
let mut identifier = "_".to_string();
identifier.push(c);
self.chars.next();
return self.lex_identifier_or_keyword(start, start + 2, identifier);
}
_ => Some(Ok((start, Token::OtherLiteral('_'), start + 1)))
}
}
Some((start, c @ 'A'...'Z')) |
Some((start, c @ 'a'...'z')) => {
return self.lex_identifier_or_keyword(start, start + 1, c.to_string())
}
Some((start, c @ '0'...'9')) => {
return self.lex_integer_or_float_literal(start, start, "".to_string(), c)
}
Some((start, '"')) => return self.lex_string(start),
Some((start, ':')) => return Some(Ok((start, Token::Colon, start + 1))),
Some((start, ',')) => return Some(Ok((start, Token::Comma, start + 1))),
Some((start, '=')) => return Some(Ok((start, Token::Equals, start + 1))),
Some((start, '>')) => return Some(Ok((start, Token::GreaterThan, start + 1))),
Some((start, '{')) => return Some(Ok((start, Token::LeftBrace, start + 1))),
Some((start, '[')) => return Some(Ok((start, Token::LeftBracket, start + 1))),
Some((start, '(')) => return Some(Ok((start, Token::LeftParenthesis, start + 1))),
Some((start, '<')) => return Some(Ok((start, Token::LessThan, start + 1))),
Some((start, '?')) => return Some(Ok((start, Token::QuestionMark, start + 1))),
Some((start, '}')) => return Some(Ok((start, Token::RightBrace, start + 1))),
Some((start, ']')) => return Some(Ok((start, Token::RightBracket, start + 1))),
Some((start, ')')) => return Some(Ok((start, Token::RightParenthesis, start + 1))),
Some((start, ';')) => return Some(Ok((start, Token::Semicolon, start + 1))),
Some((start, '/')) => {
match self.chars.peek() {
Some(&(_, '/')) => {
self.lex_line_comment();
continue;
}
Some(&(_, '*')) => {
match self.lex_block_comment(start) {
Some(error) => return Some(error),
None => continue
}
}
_ => return Some(Ok((start, Token::OtherLiteral('/'), start + 1)))
}
}
Some((start, '.')) => {
match self.chars.peek() {
Some(&(_, '.')) => return self.lex_ellipsis(start),
Some(&(_, c @ '0'...'9')) => {
let mut float_literal = '.'.to_string();
float_literal.push(c);
self.chars.next();
return self.lex_float_literal(
start,
start + 2,
float_literal,
FloatLexState::ImmediatelyAfterDecimalPoint);
}
_ => return Some(Ok((start, Token::Period, start + 1))),
}
}
Some((start, '-')) => {
match self.chars.peek() {
Some(&(_, 'I')) => return self.lex_negative_infinity(start),
Some(&(_, '.')) => {
self.chars.next();
return self.lex_float_literal(
start,
start + 2,
"-.".to_string(),
FloatLexState::ImmediatelyAfterDecimalPoint);
}
Some(&(_, c @ '0'...'9')) => {
self.chars.next();
return self.lex_integer_or_float_literal(start,
start + 1,
"-".to_string(),
c);
}
_ => return Some(Ok((start, Token::Hyphen, 1)))
}
}
Some((start, c)) => Some(Ok((start, Token::OtherLiteral(c), start + 1))),
None => return None,
}
}
}
fn lex_block_comment(&mut self, start: usize) -> Option<<Self as Iterator>::Item> {
self.chars.next();
let mut previous = start + 1;
loop {
previous += 1;
match self.chars.next() {
Some((_, '*')) => match self.chars.next() {
Some((_, '/')) => break,
Some(_) => continue,
None => {
return Some(Err(create_error(
LexicalErrorCode::ExpectedCommentBlockEnd,
previous + 1,
)))
}
},
Some(_) => continue,
None => {
return Some(Err(create_error(
LexicalErrorCode::ExpectedCommentBlockEnd,
previous,
)))
}
}
}
None
}
fn lex_ellipsis(&mut self, start: usize) -> Option<<Self as Iterator>::Item> {
self.chars.next();
match self.chars.peek() {
Some(&(_, '.')) => {
self.chars.next();
Some(Ok((start, Token::Ellipsis, start + 3)))
}
_ => Some(Err(create_error(
LexicalErrorCode::ExpectedEllipsis,
start + 2,
))),
}
}
fn lex_float_literal(
&mut self,
start: usize,
mut offset: usize,
mut float_literal: String,
mut float_lex_state: FloatLexState,
) -> Option<<Self as Iterator>::Item> {
loop {
match float_lex_state {
FloatLexState::BeforeDecimalPoint => match self.chars.peek() {
Some(&(_, c @ '0'...'9')) => {
self.push_next_char(&mut float_literal, c, &mut offset)
}
Some(&(_, 'e')) | Some(&(_, 'E')) => {
self.push_next_char(&mut float_literal, 'e', &mut offset);
float_lex_state = FloatLexState::ImmediatelyAfterExponentBase;
}
Some(&(_, '.')) => {
self.push_next_char(&mut float_literal, '.', &mut offset);
float_lex_state = FloatLexState::ImmediatelyAfterDecimalPoint;
}
_ => panic!(
"Integer literals should not be\
able to be lexed as float literals"
),
},
FloatLexState::ImmediatelyAfterDecimalPoint => match self.chars.peek() {
Some(&(_, c @ '0'...'9')) => {
self.push_next_char(&mut float_literal, c, &mut offset);
float_lex_state = FloatLexState::AfterDecimalPoint;
}
Some(&(_, 'e')) | Some(&(_, 'E')) => {
if float_literal.chars().count() == 1 {
panic!(
"A leading decimal point followed by\
an exponent should not be possible"
);
}
self.push_next_char(&mut float_literal, 'e', &mut offset);
float_lex_state = FloatLexState::ImmediatelyAfterExponentBase;
}
_ if float_literal.starts_with("-.") => {
return Some(Err(create_error(
LexicalErrorCode::ExpectedDecimalDigit,
offset,
)))
}
_ => {
return Some(Ok((
start,
Token::FloatLiteral(float_literal.parse().unwrap()),
offset,
)));
}
},
FloatLexState::AfterDecimalPoint => match self.chars.peek() {
Some(&(_, c @ '0'...'9')) => {
self.push_next_char(&mut float_literal, c, &mut offset);
}
Some(&(_, 'e')) | Some(&(_, 'E')) => {
self.push_next_char(&mut float_literal, 'e', &mut offset);
float_lex_state = FloatLexState::ImmediatelyAfterExponentBase;
}
_ => {
return Some(Ok((
start,
Token::FloatLiteral(float_literal.parse().unwrap()),
offset,
)));
}
},
FloatLexState::ImmediatelyAfterExponentBase => match self.chars.peek() {
Some(&(_, c @ '0'...'9')) => {
self.push_next_char(&mut float_literal, c, &mut offset);
float_lex_state = FloatLexState::AfterExponentBase;
}
Some(&(_, c @ '+')) | Some(&(_, c @ '-')) => {
self.push_next_char(&mut float_literal, c, &mut offset);
float_lex_state = FloatLexState::ImmediatelyAfterExponentSign;
}
_ => {
return Some(Err(create_error(
LexicalErrorCode::ExpectedFloatExponent,
offset,
)))
}
},
FloatLexState::ImmediatelyAfterExponentSign => match self.chars.peek() {
Some(&(_, c @ '0'...'9')) => {
self.push_next_char(&mut float_literal, c, &mut offset);
float_lex_state = FloatLexState::AfterExponentBase;
}
_ => {
return Some(Err(create_error(
LexicalErrorCode::ExpectedFloatExponent,
offset,
)))
}
},
FloatLexState::AfterExponentBase => match self.chars.peek() {
Some(&(_, c @ '0'...'9')) => {
self.push_next_char(&mut float_literal, c, &mut offset);
}
_ => {
return Some(Ok((
start,
Token::FloatLiteral(float_literal.parse().unwrap()),
offset,
)));
}
},
}
}
}
fn lex_hexadecimal_literal(
&mut self,
start: usize,
mut offset: usize,
mut hexadecimal_literal: String,
) -> Option<<Self as Iterator>::Item> {
offset += 2;
self.chars.next();
match self.chars.peek() {
Some(&(_, c)) if c.is_digit(16) => loop {
match self.chars.peek() {
Some(&(_, c)) if c.is_digit(16) => {
self.push_next_char(&mut hexadecimal_literal, c, &mut offset);
}
_ => {
let token = to_integer_literal(&hexadecimal_literal, 16);
return Some(Ok((start, token, offset)));
}
}
},
_ => Some(Err(create_error(
LexicalErrorCode::ExpectedHexadecimalDigit,
offset,
))),
}
}
fn lex_identifier(&mut self, offset: &mut usize, mut identifier: &mut String) {
loop {
match self.chars.peek() {
Some(&(_, c @ 'A'...'Z'))
| Some(&(_, c @ 'a'...'z'))
| Some(&(_, c @ '0'...'9'))
| Some(&(_, c @ '_'))
| Some(&(_, c @ '-')) => {
self.push_next_char(&mut identifier, c, offset);
}
_ => break,
}
}
}
fn lex_identifier_or_keyword(
&mut self,
start: usize,
mut offset: usize,
mut identifier: String,
) -> Option<<Self as Iterator>::Item> {
self.lex_identifier(&mut offset, &mut identifier);
let token = match &*identifier {
"ArrayBuffer" => Token::ArrayBuffer,
"ByteString" => Token::ByteString,
"DataView" => Token::DataView,
"DOMString" => Token::DOMString,
"Error" => Token::Error,
"Float32Array" => Token::Float32Array,
"Float64Array" => Token::Float64Array,
"FrozenArray" => Token::FrozenArray,
"Infinity" => Token::PositiveInfinity,
"Int16Array" => Token::Int16Array,
"Int32Array" => Token::Int32Array,
"Int8Array" => Token::Int8Array,
"NaN" => Token::NaN,
"Promise" => Token::Promise,
"USVString" => Token::USVString,
"Uint16Array" => Token::Uint16Array,
"Uint32Array" => Token::Uint32Array,
"Uint8Array" => Token::Uint8Array,
"Uint8ClampedArray" => Token::Uint8ClampedArray,
"any" => Token::Any,
"attribute" => Token::Attribute,
"boolean" => Token::Boolean,
"byte" => Token::Byte,
"callback" => Token::Callback,
"const" => Token::Const,
"deleter" => Token::Deleter,
"dictionary" => Token::Dictionary,
"double" => Token::Double,
"enum" => Token::Enum,
"false" => Token::False,
"float" => Token::Float,
"getter" => Token::Getter,
"implements" => Token::Implements,
"includes" => Token::Includes,
"inherit" => Token::Inherit,
"interface" => Token::Interface,
"iterable" => Token::Iterable,
"legacycaller" => Token::LegacyCaller,
"long" => Token::Long,
"maplike" => Token::Maplike,
"mixin" => Token::Mixin,
"namespace" => Token::Namespace,
"null" => Token::Null,
"object" => Token::Object,
"octet" => Token::Octet,
"optional" => Token::Optional,
"or" => Token::Or,
"partial" => Token::Partial,
"readonly" => Token::ReadOnly,
"record" => Token::Record,
"required" => Token::Required,
"sequence" => Token::Sequence,
"setlike" => Token::Setlike,
"setter" => Token::Setter,
"short" => Token::Short,
"static" => Token::Static,
"stringifier" => Token::Stringifier,
"symbol" => Token::Symbol,
"typedef" => Token::Typedef,
"true" => Token::True,
"unrestricted" => Token::Unrestricted,
"unsigned" => Token::Unsigned,
"void" => Token::Void,
_ => if identifier.starts_with('_') {
Token::Identifier(identifier.split_at(1).1.to_string())
} else {
Token::Identifier(identifier)
},
};
Some(Ok((start, token, offset)))
}
fn lex_integer_or_float_literal(
&mut self,
start: usize,
mut offset: usize,
mut literal: String,
c: char,
) -> Option<<Self as Iterator>::Item> {
match c {
'0' => match self.chars.peek() {
Some(&(_, 'x')) | Some(&(_, 'X')) => {
self.lex_hexadecimal_literal(start, offset, literal)
}
Some(&(_, c @ '0'...'9')) => {
offset += 2;
literal.push(c);
if c > '7' {
if !self.lookahead_for_decimal_point() {
return Some(Ok((start, Token::SignedIntegerLiteral(0), offset - 1)));
}
self.chars.next();
return self.lex_float_literal(
start,
offset,
literal,
FloatLexState::BeforeDecimalPoint,
);
}
self.chars.next();
loop {
match self.chars.peek() {
Some(&(_, c @ '0'...'9')) => {
if c > '7' {
if !self.lookahead_for_decimal_point() {
let token = to_integer_literal(&literal, 8);
return Some(Ok((start, token, offset)));
}
self.push_next_char(&mut literal, c, &mut offset);
return self.lex_float_literal(
start,
offset,
literal,
FloatLexState::BeforeDecimalPoint,
);
}
self.push_next_char(&mut literal, c, &mut offset);
if c > '7' {
return self.lex_float_literal(
start,
offset,
literal,
FloatLexState::BeforeDecimalPoint,
);
}
}
Some(&(_, '.')) => {
self.push_next_char(&mut literal, '.', &mut offset);
return self.lex_float_literal(
start,
offset,
literal,
FloatLexState::ImmediatelyAfterDecimalPoint,
);
}
Some(&(_, 'e')) | Some(&(_, 'E')) => {
self.push_next_char(&mut literal, 'e', &mut offset);
return self.lex_float_literal(
start,
offset,
literal,
FloatLexState::ImmediatelyAfterExponentBase,
);
}
_ => {
let token = to_integer_literal(&literal, 8);
return Some(Ok((start, token, offset)));
}
}
}
}
Some(&(_, '.')) => {
self.chars.next();
self.lex_float_literal(
start,
start + 2,
"0.".to_string(),
FloatLexState::ImmediatelyAfterDecimalPoint,
)
}
Some(&(_, 'e')) | Some(&(_, 'E')) => {
self.chars.next();
self.lex_float_literal(
start,
start + 2,
"0e".to_string(),
FloatLexState::ImmediatelyAfterExponentBase,
)
}
_ => Some(Ok((start, Token::SignedIntegerLiteral(0), start + 1))),
},
c => {
literal.push(c);
offset += 1;
loop {
match self.chars.peek() {
Some(&(_, c @ '0'...'9')) => {
self.push_next_char(&mut literal, c, &mut offset)
}
Some(&(_, '.')) => {
self.push_next_char(&mut literal, '.', &mut offset);
return self.lex_float_literal(
start,
offset,
literal,
FloatLexState::ImmediatelyAfterDecimalPoint,
);
}
Some(&(_, 'e')) | Some(&(_, 'E')) => {
self.push_next_char(&mut literal, 'e', &mut offset);
return self.lex_float_literal(
start,
offset,
literal,
FloatLexState::ImmediatelyAfterExponentBase,
);
}
_ => {
let token = to_integer_literal(&literal, 10);
return Some(Ok((start, token, offset)));
}
}
}
}
}
}
fn lex_line_comment(&mut self) {
while let Some(&(_, c)) = self.chars.peek() {
if c == '\n' {
break;
}
self.chars.next();
}
}
fn lex_negative_infinity(&mut self, start: usize) -> Option<<Self as Iterator>::Item> {
let infinity = self
.chars
.clone()
.take(8)
.map(|(_, c)| c)
.collect::<String>();
if infinity == "Infinity" {
for _ in 0..8 {
self.chars.next();
}
Some(Ok((start, Token::NegativeInfinity, start + 9)))
} else {
let char_count = infinity.chars().count();
for _ in char_count..8 {
self.chars.next();
}
Some(Err(LexicalError {
code: LexicalErrorCode::ExpectedKeywordInfinity,
location: start + char_count,
}))
}
}
fn lex_string(&mut self, start: usize) -> Option<<Self as Iterator>::Item> {
let mut previous = start;
let mut string = String::new();
loop {
previous += 1;
match self.chars.next() {
Some((_, '"')) => {
return Some(Ok((start, Token::StringLiteral(string), previous + 1)))
}
Some((_, c)) => {
string.push(c);
}
None => {
return Some(Err(create_error(
LexicalErrorCode::ExpectedStringLiteralEnd,
previous + 1,
)))
}
}
}
}
fn lookahead_for_decimal_point(&mut self) -> bool {
let mut chars = self.chars.clone();
loop {
match chars.next() {
Some((_, '0'...'9')) => continue,
Some((_, '.')) => return true,
_ => return false,
}
}
}
fn push_next_char(&mut self, token: &mut String, next_char: char, offset: &mut usize) {
self.chars.next();
token.push(next_char);
*offset += 1;
}
}
impl<'input> Iterator for Lexer<'input> {
type Item = Spanned<Token, usize, LexicalError>;
fn next(&mut self) -> Option<Self::Item> {
self.get_next_token()
}
}
fn to_integer_literal(literal: &str, radix: u32) -> Token {
if let Ok(literal) = i64::from_str_radix(literal, radix) {
Token::SignedIntegerLiteral(literal)
} else {
Token::UnsignedIntegerLiteral(u64::from_str_radix(literal, radix).unwrap())
}
}
#[cfg(test)]
mod test {
use super::*;
fn assert_lex(input: &str, expected: Vec<Spanned<Token, usize, LexicalError>>) {
let lexer = Lexer::new(input);
assert_eq!(lexer.collect::<Vec<_>>(), expected);
}
#[test]
fn lex_colon() {
assert_lex(":", vec![Ok((0, Token::Colon, 1))]);
}
#[test]
fn lex_comma() {
assert_lex(",", vec![Ok((0, Token::Comma, 1))]);
}
#[test]
fn lex_comment() {
assert_lex("// this is a comment", vec![]);
assert_lex("/* this is a comment */", vec![]);
assert_lex(
"/* this is a comment",
vec![Err(create_error(
LexicalErrorCode::ExpectedCommentBlockEnd,
20,
))],
);
assert_lex(
"/* this is a comment*",
vec![Err(create_error(
LexicalErrorCode::ExpectedCommentBlockEnd,
21,
))],
);
}
#[test]
fn lex_ellipsis() {
assert_lex("...", vec![Ok((0, Token::Ellipsis, 3))]);
assert_lex(
"..",
vec![Err(create_error(LexicalErrorCode::ExpectedEllipsis, 2))],
);
}
#[test]
fn lex_equals() {
assert_lex("=", vec![Ok((0, Token::Equals, 1))]);
}
#[test]
fn lex_float_literal() {
assert_lex("0.", vec![Ok((0, Token::FloatLiteral(0.0), 2))]);
assert_lex("000051.", vec![Ok((0, Token::FloatLiteral(51.0), 7))]);
assert_lex("05162.", vec![Ok((0, Token::FloatLiteral(5162.0), 6))]);
assert_lex("099.", vec![Ok((0, Token::FloatLiteral(99.0), 4))]);
assert_lex(
"04624.51235",
vec![Ok((0, Token::FloatLiteral(4624.51235), 11))],
);
assert_lex("0.987", vec![Ok((0, Token::FloatLiteral(0.987), 5))]);
assert_lex("0.55e10", vec![Ok((0, Token::FloatLiteral(0.55e10), 7))]);
assert_lex("0612e61", vec![Ok((0, Token::FloatLiteral(612e61), 7))]);
assert_lex("0e-1", vec![Ok((0, Token::FloatLiteral(0e-1), 4))]);
assert_lex("041e+9", vec![Ok((0, Token::FloatLiteral(41e+9), 6))]);
assert_lex(
"021e",
vec![Err(create_error(
LexicalErrorCode::ExpectedFloatExponent,
4,
))],
);
assert_lex(
"01e+",
vec![Err(create_error(
LexicalErrorCode::ExpectedFloatExponent,
4,
))],
);
assert_lex(
"01e-",
vec![Err(create_error(
LexicalErrorCode::ExpectedFloatExponent,
4,
))],
);
assert_lex("5162.", vec![Ok((0, Token::FloatLiteral(5162.0), 5))]);
assert_lex("99.", vec![Ok((0, Token::FloatLiteral(99.0), 3))]);
assert_lex(
"4624.51235",
vec![Ok((0, Token::FloatLiteral(4624.51235), 10))],
);
assert_lex("612e61", vec![Ok((0, Token::FloatLiteral(612e61), 6))]);
assert_lex("41e+9", vec![Ok((0, Token::FloatLiteral(41e+9), 5))]);
assert_lex(
"21e",
vec![Err(create_error(
LexicalErrorCode::ExpectedFloatExponent,
3,
))],
);
assert_lex(
"1e+",
vec![Err(create_error(
LexicalErrorCode::ExpectedFloatExponent,
3,
))],
);
assert_lex(
"1e-",
vec![Err(create_error(
LexicalErrorCode::ExpectedFloatExponent,
3,
))],
);
assert_lex(".5", vec![Ok((0, Token::FloatLiteral(0.5), 2))]);
assert_lex(".612e-10", vec![Ok((0, Token::FloatLiteral(0.612e-10), 8))]);
assert_lex("-700.5", vec![Ok((0, Token::FloatLiteral(-700.5), 6))]);
assert_lex("-9.e2", vec![Ok((0, Token::FloatLiteral(-9.0e2), 5))]);
assert_lex("-.5e1", vec![Ok((0, Token::FloatLiteral(-0.5e1), 5))]);
assert_lex("-.0", vec![Ok((0, Token::FloatLiteral(0.0), 3))]);
assert_lex(
"-.",
vec![Err(create_error(LexicalErrorCode::ExpectedDecimalDigit, 2))],
);
}
#[test]
fn lex_greater_than() {
assert_lex(">", vec![Ok((0, Token::GreaterThan, 1))]);
}
#[test]
fn lex_hyphen() {
assert_lex("-", vec![Ok((0, Token::Hyphen, 1))]);
}
#[test]
fn lex_identifier() {
assert_lex(
"_identifier",
vec![Ok((0, Token::Identifier("identifier".to_string()), 11))],
);
assert_lex(
"_Identifier",
vec![Ok((0, Token::Identifier("Identifier".to_string()), 11))],
);
assert_lex(
"identifier",
vec![Ok((0, Token::Identifier("identifier".to_string()), 10))],
);
assert_lex(
"Identifier",
vec![Ok((0, Token::Identifier("Identifier".to_string()), 10))],
);
assert_lex(
"z0123",
vec![Ok((0, Token::Identifier("z0123".to_string()), 5))],
);
assert_lex(
"i-d-e_t0123",
vec![Ok((0, Token::Identifier("i-d-e_t0123".to_string()), 11))],
);
}
#[test]
fn lex_integer_literal() {
assert_lex("1", vec![Ok((0, Token::SignedIntegerLiteral(1), 1))]);
assert_lex("9624", vec![Ok((0, Token::SignedIntegerLiteral(9624), 4))]);
assert_lex("-1", vec![Ok((0, Token::SignedIntegerLiteral(-1), 2))]);
assert_lex(
"-9624",
vec![Ok((0, Token::SignedIntegerLiteral(-9624), 5))],
);
assert_lex("0x0", vec![Ok((0, Token::SignedIntegerLiteral(0x0), 3))]);
assert_lex(
"0x1234FF",
vec![Ok((0, Token::SignedIntegerLiteral(0x1234FF), 8))],
);
assert_lex(
"0x",
vec![Err(create_error(
LexicalErrorCode::ExpectedHexadecimalDigit,
2,
))],
);
assert_lex("-0x0", vec![Ok((0, Token::SignedIntegerLiteral(0x0), 4))]);
assert_lex(
"-0x1234FF",
vec![Ok((0, Token::SignedIntegerLiteral(-0x1234FF), 9))],
);
assert_lex(
"-0x",
vec![Err(create_error(
LexicalErrorCode::ExpectedHexadecimalDigit,
3,
))],
);
assert_lex("0", vec![Ok((0, Token::SignedIntegerLiteral(0), 1))]);
assert_lex("0624", vec![Ok((0, Token::SignedIntegerLiteral(0o624), 4))]);
assert_lex(
"-0624",
vec![Ok((0, Token::SignedIntegerLiteral(-0o624), 5))],
);
assert_lex(
"08",
vec![
Ok((0, Token::SignedIntegerLiteral(0), 1)),
Ok((1, Token::SignedIntegerLiteral(8), 2)),
],
);
assert_lex(
"01238",
vec![
Ok((0, Token::SignedIntegerLiteral(0o123), 4)),
Ok((4, Token::SignedIntegerLiteral(8), 5)),
],
);
assert_lex(
"18446744073709551615",
vec![Ok((
0,
Token::UnsignedIntegerLiteral(18446744073709551615),
20,
))],
);
assert_lex(
"01777777777777777777777",
vec![Ok((
0,
Token::UnsignedIntegerLiteral(18446744073709551615),
23,
))],
);
assert_lex(
"0xFFFFFFFFFFFFFFFF",
vec![Ok((
0,
Token::UnsignedIntegerLiteral(18446744073709551615),
18,
))],
);
assert_lex(
"9223372036854775807",
vec![Ok((
0,
Token::SignedIntegerLiteral(9223372036854775807),
19,
))],
);
assert_lex(
"9223372036854775808",
vec![Ok((
0,
Token::UnsignedIntegerLiteral(9223372036854775808),
19,
))],
);
assert_lex(
"0777777777777777777777",
vec![Ok((
0,
Token::SignedIntegerLiteral(9223372036854775807),
22,
))],
);
assert_lex(
"01000000000000000000000",
vec![Ok((
0,
Token::UnsignedIntegerLiteral(9223372036854775808),
23,
))],
);
assert_lex(
"0x7FFFFFFFFFFFFFFF",
vec![Ok((
0,
Token::SignedIntegerLiteral(9223372036854775807),
18,
))],
);
assert_lex(
"0x8000000000000000",
vec![Ok((
0,
Token::UnsignedIntegerLiteral(9223372036854775808),
18,
))],
);
assert_lex(
"-9223372036854775808",
vec![Ok((
0,
Token::SignedIntegerLiteral(-9223372036854775808),
20,
))],
);
assert_lex(
"-01000000000000000000000",
vec![Ok((
0,
Token::SignedIntegerLiteral(-9223372036854775808),
24,
))],
);
assert_lex(
"-0x8000000000000000",
vec![Ok((
0,
Token::SignedIntegerLiteral(-9223372036854775808),
19,
))],
);
}
#[test]
fn lex_keyword() {
assert_lex("ArrayBuffer", vec![Ok((0, Token::ArrayBuffer, 11))]);
assert_lex("ByteString", vec![Ok((0, Token::ByteString, 10))]);
assert_lex("DataView", vec![Ok((0, Token::DataView, 8))]);
assert_lex("DOMString", vec![Ok((0, Token::DOMString, 9))]);
assert_lex("Error", vec![Ok((0, Token::Error, 5))]);
assert_lex("Float32Array", vec![Ok((0, Token::Float32Array, 12))]);
assert_lex("Float64Array", vec![Ok((0, Token::Float64Array, 12))]);
assert_lex("FrozenArray", vec![Ok((0, Token::FrozenArray, 11))]);
assert_lex("Infinity", vec![Ok((0, Token::PositiveInfinity, 8))]);
assert_lex("Int16Array", vec![Ok((0, Token::Int16Array, 10))]);
assert_lex("Int32Array", vec![Ok((0, Token::Int32Array, 10))]);
assert_lex("Int8Array", vec![Ok((0, Token::Int8Array, 9))]);
assert_lex("NaN", vec![Ok((0, Token::NaN, 3))]);
assert_lex("Promise", vec![Ok((0, Token::Promise, 7))]);
assert_lex("USVString", vec![Ok((0, Token::USVString, 9))]);
assert_lex("Uint16Array", vec![Ok((0, Token::Uint16Array, 11))]);
assert_lex("Uint32Array", vec![Ok((0, Token::Uint32Array, 11))]);
assert_lex("Uint8Array", vec![Ok((0, Token::Uint8Array, 10))]);
assert_lex(
"Uint8ClampedArray",
vec![Ok((0, Token::Uint8ClampedArray, 17))],
);
assert_lex("any", vec![Ok((0, Token::Any, 3))]);
assert_lex("attribute", vec![Ok((0, Token::Attribute, 9))]);
assert_lex("boolean", vec![Ok((0, Token::Boolean, 7))]);
assert_lex("byte", vec![Ok((0, Token::Byte, 4))]);
assert_lex("callback", vec![Ok((0, Token::Callback, 8))]);
assert_lex("const", vec![Ok((0, Token::Const, 5))]);
assert_lex("deleter", vec![Ok((0, Token::Deleter, 7))]);
assert_lex("dictionary", vec![Ok((0, Token::Dictionary, 10))]);
assert_lex("double", vec![Ok((0, Token::Double, 6))]);
assert_lex("enum", vec![Ok((0, Token::Enum, 4))]);
assert_lex("false", vec![Ok((0, Token::False, 5))]);
assert_lex("float", vec![Ok((0, Token::Float, 5))]);
assert_lex("getter", vec![Ok((0, Token::Getter, 6))]);
assert_lex("implements", vec![Ok((0, Token::Implements, 10))]);
assert_lex("includes", vec![Ok((0, Token::Includes, 8))]);
assert_lex("inherit", vec![Ok((0, Token::Inherit, 7))]);
assert_lex("interface", vec![Ok((0, Token::Interface, 9))]);
assert_lex("iterable", vec![Ok((0, Token::Iterable, 8))]);
assert_lex("legacycaller", vec![Ok((0, Token::LegacyCaller, 12))]);
assert_lex("long", vec![Ok((0, Token::Long, 4))]);
assert_lex("maplike", vec![Ok((0, Token::Maplike, 7))]);
assert_lex("mixin", vec![Ok((0, Token::Mixin, 5))]);
assert_lex("namespace", vec![Ok((0, Token::Namespace, 9))]);
assert_lex("null", vec![Ok((0, Token::Null, 4))]);
assert_lex("object", vec![Ok((0, Token::Object, 6))]);
assert_lex("octet", vec![Ok((0, Token::Octet, 5))]);
assert_lex("optional", vec![Ok((0, Token::Optional, 8))]);
assert_lex("or", vec![Ok((0, Token::Or, 2))]);
assert_lex("partial", vec![Ok((0, Token::Partial, 7))]);
assert_lex("readonly", vec![Ok((0, Token::ReadOnly, 8))]);
assert_lex("record", vec![Ok((0, Token::Record, 6))]);
assert_lex("required", vec![Ok((0, Token::Required, 8))]);
assert_lex("sequence", vec![Ok((0, Token::Sequence, 8))]);
assert_lex("setlike", vec![Ok((0, Token::Setlike, 7))]);
assert_lex("setter", vec![Ok((0, Token::Setter, 6))]);
assert_lex("short", vec![Ok((0, Token::Short, 5))]);
assert_lex("static", vec![Ok((0, Token::Static, 6))]);
assert_lex("stringifier", vec![Ok((0, Token::Stringifier, 11))]);
assert_lex("symbol", vec![Ok((0, Token::Symbol, 6))]);
assert_lex("true", vec![Ok((0, Token::True, 4))]);
assert_lex("typedef", vec![Ok((0, Token::Typedef, 7))]);
assert_lex("unsigned", vec![Ok((0, Token::Unsigned, 8))]);
assert_lex("unrestricted", vec![Ok((0, Token::Unrestricted, 12))]);
assert_lex("void", vec![Ok((0, Token::Void, 4))]);
}
#[test]
fn lex_left_brace() {
assert_lex("{", vec![Ok((0, Token::LeftBrace, 1))]);
}
#[test]
fn lex_left_bracket() {
assert_lex("[", vec![Ok((0, Token::LeftBracket, 1))]);
}
#[test]
fn lex_left_parenthesis() {
assert_lex("(", vec![Ok((0, Token::LeftParenthesis, 1))]);
}
#[test]
fn lex_less_than() {
assert_lex("<", vec![Ok((0, Token::LessThan, 1))]);
}
#[test]
fn lex_negative_infinity() {
assert_lex("-Infinity", vec![Ok((0, Token::NegativeInfinity, 9))]);
assert_lex(
"-Infinity;",
vec![
Ok((0, Token::NegativeInfinity, 9)),
Ok((9, Token::Semicolon, 10)),
],
);
}
#[test]
fn lex_other_literal() {
assert_lex("%", vec![Ok((0, Token::OtherLiteral('%'), 1))]);
assert_lex("/", vec![Ok((0, Token::OtherLiteral('/'), 1))]);
assert_lex("!", vec![Ok((0, Token::OtherLiteral('!'), 1))]);
assert_lex("_", vec![Ok((0, Token::OtherLiteral('_'), 1))]);
}
#[test]
fn lex_period() {
assert_lex(".", vec![Ok((0, Token::Period, 1))]);
}
#[test]
fn lex_question_mark() {
assert_lex("?", vec![Ok((0, Token::QuestionMark, 1))]);
}
#[test]
fn lex_right_brace() {
assert_lex("}", vec![Ok((0, Token::RightBrace, 1))]);
}
#[test]
fn lex_right_bracket() {
assert_lex("]", vec![Ok((0, Token::RightBracket, 1))]);
}
#[test]
fn lex_right_parenthesis() {
assert_lex(")", vec![Ok((0, Token::RightParenthesis, 1))]);
}
#[test]
fn lex_semicolon() {
assert_lex(";", vec![Ok((0, Token::Semicolon, 1))]);
}
#[test]
fn lex_string() {
assert_lex(
r#""this is a string""#,
vec![Ok((
0,
Token::StringLiteral("this is a string".to_string()),
18,
))],
);
assert_lex(
r#""this is a string"#,
vec![Err(create_error(
LexicalErrorCode::ExpectedStringLiteralEnd,
18,
))],
);
}
#[test]
fn lex_whitespace() {
assert_lex(" \n \t", vec![]);
assert_lex("\r\n", vec![]);
}
}