pub(crate) mod operator;
#[cfg(test)]
mod tests;
pub(crate) mod tokens;
use crate::internal::as2::lexer::operator::lex_operator;
use crate::internal::as2::lexer::tokens::{Keyword, QuoteKind, Token, TokenKind};
use crate::internal::span::{FileId, Span};
use winnow::stream::{AsBStr, AsChar, FindSlice, Location, Stream as _};
pub(crate) type Stream<'i> = winnow::stream::LocatingSlice<&'i str>;
pub struct Lexer<'i> {
stream: Stream<'i>,
file_id: FileId,
}
impl<'i> Lexer<'i> {
pub fn new(input: &'i str, file_id: FileId) -> Self {
Self {
stream: Stream::new(input.strip_prefix('\u{FEFF}').unwrap_or(input)),
file_id,
}
}
pub fn into_vec(self) -> Vec<Token<'i>> {
let capacity = core::cmp::min(self.stream.len(), usize::MAX / size_of::<Token>());
let mut vec = Vec::with_capacity(capacity);
vec.extend(self);
vec
}
}
impl<'a> Iterator for Lexer<'a> {
type Item = Token<'a>;
fn next(&mut self) -> Option<Self::Item> {
loop {
let peek_byte = self.stream.as_bstr().first()?;
if let Some(token) = process_token(*peek_byte, &mut self.stream, self.file_id) {
return Some(token);
}
}
}
}
fn process_token<'a>(peek_byte: u8, stream: &mut Stream<'a>, file_id: FileId) -> Option<Token<'a>> {
match peek_byte {
b' ' | b'\t' => {
stream.next_slice(1);
None
}
b'/' => lex_comment_or_divide(stream, file_id),
b'(' => Some(lex_ascii_char(stream, TokenKind::OpenParen, file_id)),
b',' => Some(lex_ascii_char(stream, TokenKind::Comma, file_id)),
b')' => Some(lex_ascii_char(stream, TokenKind::CloseParen, file_id)),
b'{' => Some(lex_ascii_char(stream, TokenKind::OpenBrace, file_id)),
b'}' => Some(lex_ascii_char(stream, TokenKind::CloseBrace, file_id)),
b'[' => Some(lex_ascii_char(stream, TokenKind::OpenBracket, file_id)),
b']' => Some(lex_ascii_char(stream, TokenKind::CloseBracket, file_id)),
b';' => Some(lex_ascii_char(stream, TokenKind::Semicolon, file_id)),
b'=' | b'+' | b'-' | b'*' | b'%' | b'&' | b'^' | b'|' | b'~' | b'>' | b'<' | b'!' => {
Some(lex_operator(stream, file_id))
}
b'\r' => Some(lex_crlf(stream, file_id)),
b'\n' => Some(lex_ascii_char(stream, TokenKind::Newline, file_id)),
b'"' => Some(lex_string(stream, QuoteKind::Double, file_id)),
b'\'' => Some(lex_string(stream, QuoteKind::Single, file_id)),
b'?' => Some(lex_ascii_char(stream, TokenKind::Question, file_id)),
b':' => Some(lex_ascii_char(stream, TokenKind::Colon, file_id)),
b'a'..=b'z' | b'A'..=b'Z' | b'_' | b'$' => Some(lex_identifier_or_keyword(stream, file_id)),
b'0'..=b'9' | b'.' => Some(lex_integer_or_float(stream, file_id)),
b'@' => Some(lex_pcode(stream, file_id)),
b'#' => Some(lex_ascii_char(stream, TokenKind::Hash, file_id)),
_ => {
let start = stream.current_token_start();
let raw = stream.next_slice(stream.eof_offset());
let end = stream.previous_token_end();
Some(Token::new(
TokenKind::Unknown,
Span::new_unchecked(start, end, file_id),
raw,
))
}
}
}
fn lex_comment_or_divide<'a>(stream: &mut Stream<'a>, file_id: FileId) -> Option<Token<'a>> {
let next = stream.as_bstr().get(1);
match next {
Some(b'/') => {
skip_line_comment(stream);
None
}
Some(b'*') => {
skip_block_comment(stream);
None
}
_ => Some(lex_operator(stream, file_id)),
}
}
fn skip_line_comment(stream: &mut Stream<'_>) {
stream.next_slice(2);
if let Some(offset) = stream.as_bstr().find_slice(&b"\n"[..]) {
stream.next_slice(offset.end);
} else {
stream.finish();
}
}
fn skip_block_comment(stream: &mut Stream<'_>) {
stream.next_slice(2);
if let Some(span) = stream.as_bstr().find_slice(&b"*/"[..]) {
let offset = span.end;
stream.next_slice(offset);
} else {
stream.finish();
}
}
fn lex_ascii_char<'a>(stream: &mut Stream<'a>, kind: TokenKind, file_id: FileId) -> Token<'a> {
let start = stream.current_token_start();
let offset = 1; let raw = stream.next_slice(offset);
let end = stream.previous_token_end();
let span = Span::new_unchecked(start, end, file_id);
Token::new(kind, span, raw)
}
fn lex_integer_or_float<'a>(stream: &mut Stream<'a>, file_id: FileId) -> Token<'a> {
let start = stream.current_token_start();
let start_checkpoint = stream.checkpoint();
let is_hex = stream.as_bstr().starts_with(b"0x");
if is_hex {
stream.next_slice(2); }
let invalid_char: fn(u8) -> bool = if is_hex {
|b| !b.is_hex_digit()
} else {
|b| !b.is_ascii_digit()
};
if let Some(offset) = stream.as_bstr().offset_for(invalid_char) {
stream.next_slice(offset)
} else {
stream.finish()
};
let kind = if !is_hex && stream.as_bstr().first() == Some(&b'.') {
stream.next_slice(1); if let Some(offset) = stream.as_bstr().offset_for(invalid_char) {
stream.next_slice(offset)
} else {
stream.finish()
};
TokenKind::Float
} else {
TokenKind::Integer
};
if matches!(stream.as_bstr().first(), Some(b'e' | b'E')) {
stream.next_slice(1); if stream.as_bstr().first() == Some(&b'+') || stream.as_bstr().first() == Some(&b'-') {
stream.next_slice(1); }
if let Some(offset) = stream.as_bstr().offset_for(invalid_char) {
stream.next_slice(offset);
}
}
let end = stream.previous_token_end();
stream.reset(&start_checkpoint);
let raw = stream.next_slice(end - start);
if raw.starts_with('.') && !raw.contains(|c: char| c.is_ascii_digit()) {
stream.reset(&start_checkpoint);
return lex_ascii_char(stream, TokenKind::Period, file_id);
}
let end = stream.previous_token_end();
let span = Span::new_unchecked(start, end, file_id);
Token::new(kind, span, raw)
}
fn lex_crlf<'a>(stream: &mut Stream<'a>, file_id: FileId) -> Token<'a> {
let start = stream.current_token_start();
let mut offset = '\r'.len_utf8();
let has_lf = stream.as_bstr().get(1) == Some(&b'\n');
if has_lf {
offset += '\n'.len_utf8();
}
let raw = stream.next_slice(offset);
let end = stream.previous_token_end();
let span = Span::new_unchecked(start, end, file_id);
Token::new(TokenKind::Newline, span, raw)
}
fn lex_pcode<'a>(stream: &mut Stream<'a>, file_id: FileId) -> Token<'a> {
if !stream.as_bstr().starts_with(b"@PCode {") {
panic!("Invalid @PCode token! Syntax must be \"@PCode {{\" pcode here \"}}");
}
stream.next_slice(b"@PCode {".len());
let start_checkpoint = stream.checkpoint();
let start = stream.current_token_start();
let mut depth = 0;
loop {
if let Some(span) = stream.as_bstr().find_slice(('{', '}')) {
let found = stream.as_bstr()[span.start];
if found == b'{' {
let offset = span.end;
stream.next_slice(offset);
depth += 1;
} else if found == b'}' {
let offset = span.end;
stream.next_slice(offset);
if depth == 0 {
break;
}
depth -= 1;
}
} else {
stream.finish();
break;
}
}
let end = stream.previous_token_end();
stream.reset(&start_checkpoint);
let raw = stream.next_slice(end - start - 1);
stream.next_slice(1);
let span = Span::new_unchecked(start, end, file_id);
Token::new(TokenKind::PCode, span, raw)
}
pub(crate) const ESCAPE: u8 = b'\\';
fn lex_string<'a>(stream: &mut Stream<'a>, kind: QuoteKind, file_id: FileId) -> Token<'a> {
let start = stream.current_token_start();
let offset = 1; stream.next_slice(offset);
let start_checkpoint = stream.checkpoint();
let quotation_mark = match kind {
QuoteKind::Double => b'"',
QuoteKind::Single => b'\'',
};
loop {
if let Some(span) = stream.as_bstr().find_slice((quotation_mark, ESCAPE, b'\n')) {
let found = stream.as_bstr()[span.start];
if found == quotation_mark {
let offset = span.end;
stream.next_slice(offset);
break;
} else if found == ESCAPE {
let offset = span.end;
stream.next_slice(offset);
let peek = stream.as_bstr().peek_token();
if peek == Some(ESCAPE) || peek == Some(quotation_mark) {
let offset = 1; stream.next_slice(offset);
}
continue;
} else if found == b'\n' {
let offset = span.start;
stream.next_slice(offset);
break;
}
unreachable!("found `{found}`");
} else {
stream.finish();
break;
}
}
let end = stream.previous_token_end();
stream.reset(&start_checkpoint);
let raw = stream.next_slice(end - start - 2);
stream.next_slice(1);
let span = Span::new_unchecked(start, end, file_id);
Token::new(TokenKind::String(kind), span, raw)
}
fn lex_identifier_or_keyword<'a>(stream: &mut Stream<'a>, file_id: FileId) -> Token<'a> {
let start = stream.current_token_start();
let offset = stream
.as_bstr()
.offset_for(|b| !b.is_ascii_alphanumeric() && b != b'_' && b != b'$')
.unwrap_or(stream.eof_offset());
let raw = stream.next_slice(offset);
let end = stream.previous_token_end();
let span = Span::new_unchecked(start, end, file_id);
let kind = match raw {
"var" => TokenKind::Keyword(Keyword::Var),
"instanceof" => TokenKind::Keyword(Keyword::InstanceOf),
"new" => TokenKind::Keyword(Keyword::New),
"typeof" => TokenKind::Keyword(Keyword::TypeOf),
"delete" => TokenKind::Keyword(Keyword::Delete),
"in" => TokenKind::Keyword(Keyword::In),
"void" => TokenKind::Keyword(Keyword::Void),
"function" => TokenKind::Keyword(Keyword::Function),
"return" => TokenKind::Keyword(Keyword::Return),
"for" => TokenKind::Keyword(Keyword::For),
"if" => TokenKind::Keyword(Keyword::If),
"else" => TokenKind::Keyword(Keyword::Else),
"break" => TokenKind::Keyword(Keyword::Break),
"continue" => TokenKind::Keyword(Keyword::Continue),
"throw" => TokenKind::Keyword(Keyword::Throw),
"try" => TokenKind::Keyword(Keyword::Try),
"catch" => TokenKind::Keyword(Keyword::Catch),
"finally" => TokenKind::Keyword(Keyword::Finally),
"ifFrameLoaded" => TokenKind::Keyword(Keyword::IfFrameLoaded),
"tellTarget" => TokenKind::Keyword(Keyword::TellTarget),
"eq" => TokenKind::Keyword(Keyword::Eq),
"gt" => TokenKind::Keyword(Keyword::Gt),
"ge" => TokenKind::Keyword(Keyword::Ge),
"lt" => TokenKind::Keyword(Keyword::Lt),
"le" => TokenKind::Keyword(Keyword::Le),
"ne" => TokenKind::Keyword(Keyword::Ne),
"and" => TokenKind::Keyword(Keyword::And),
"or" => TokenKind::Keyword(Keyword::Or),
"not" => TokenKind::Keyword(Keyword::Not),
"while" => TokenKind::Keyword(Keyword::While),
"dynamic" => TokenKind::Keyword(Keyword::Dynamic),
"extends" => TokenKind::Keyword(Keyword::Extends),
"get" => TokenKind::Keyword(Keyword::Get),
"implements" => TokenKind::Keyword(Keyword::Implements),
"interface" => TokenKind::Keyword(Keyword::Interface),
"private" => TokenKind::Keyword(Keyword::Private),
"public" => TokenKind::Keyword(Keyword::Public),
"set" => TokenKind::Keyword(Keyword::Set),
"static" => TokenKind::Keyword(Keyword::Static),
"case" => TokenKind::Keyword(Keyword::Case),
"switch" => TokenKind::Keyword(Keyword::Switch),
"default" => TokenKind::Keyword(Keyword::Default),
"class" => TokenKind::Keyword(Keyword::Class),
"with" => TokenKind::Keyword(Keyword::With),
"import" => TokenKind::Keyword(Keyword::Import),
_ => TokenKind::Identifier,
};
Token::new(kind, span, raw)
}