use perl_error::{ParseError, ParseResult};
use perl_lexer::{LexerMode, PerlLexer, Token as LexerToken, TokenType as LexerTokenType};
pub use perl_token::{Token, TokenKind};
use std::collections::VecDeque;
enum TokenStreamInner<'a> {
Lexer(PerlLexer<'a>),
Buffered(VecDeque<Token>),
}
pub struct TokenStream<'a> {
inner: TokenStreamInner<'a>,
peeked: Option<Token>,
peeked_second: Option<Token>,
peeked_third: Option<Token>,
}
impl<'a> TokenStream<'a> {
pub fn new(input: &'a str) -> Self {
TokenStream {
inner: TokenStreamInner::Lexer(PerlLexer::new(input)),
peeked: None,
peeked_second: None,
peeked_third: None,
}
}
pub fn from_vec(tokens: Vec<Token>) -> Self {
TokenStream {
inner: TokenStreamInner::Buffered(VecDeque::from(tokens)),
peeked: None,
peeked_second: None,
peeked_third: None,
}
}
pub fn lexer_tokens_to_parser_tokens(tokens: Vec<LexerToken>) -> Vec<Token> {
tokens
.into_iter()
.filter(|t| {
!matches!(
t.token_type,
LexerTokenType::Whitespace | LexerTokenType::Newline | LexerTokenType::EOF
) && !matches!(t.token_type, LexerTokenType::Comment(_))
})
.map(Self::convert_lexer_token)
.collect()
}
pub fn peek(&mut self) -> ParseResult<&Token> {
if self.peeked.is_none() {
self.peeked = Some(self.next_token()?);
}
self.peeked.as_ref().ok_or(ParseError::UnexpectedEof)
}
#[allow(clippy::should_implement_trait)]
pub fn next(&mut self) -> ParseResult<Token> {
if let Some(token) = self.peeked.take() {
if token.kind == TokenKind::Eof {
self.peeked = Some(token.clone());
} else {
self.peeked = self.peeked_second.take();
self.peeked_second = self.peeked_third.take();
}
Ok(token)
} else {
let token = self.next_token()?;
if token.kind == TokenKind::Eof {
self.peeked = Some(token.clone());
}
Ok(token)
}
}
pub fn is_eof(&mut self) -> bool {
matches!(self.peek(), Ok(token) if token.kind == TokenKind::Eof)
}
pub fn peek_second(&mut self) -> ParseResult<&Token> {
self.peek()?;
if self.peeked_second.is_none() {
self.peeked_second = Some(self.next_token()?);
}
self.peeked_second.as_ref().ok_or(ParseError::UnexpectedEof)
}
pub fn peek_third(&mut self) -> ParseResult<&Token> {
self.peek_second()?;
if self.peeked_third.is_none() {
self.peeked_third = Some(self.next_token()?);
}
self.peeked_third.as_ref().ok_or(ParseError::UnexpectedEof)
}
pub fn enter_format_mode(&mut self) {
if let TokenStreamInner::Lexer(ref mut lexer) = self.inner {
lexer.enter_format_mode();
}
}
pub fn on_stmt_boundary(&mut self) {
self.peeked = None;
self.peeked_second = None;
self.peeked_third = None;
if let TokenStreamInner::Lexer(ref mut lexer) = self.inner {
lexer.set_mode(LexerMode::ExpectTerm);
}
}
pub fn relex_as_term(&mut self) {
if let TokenStreamInner::Lexer(ref mut lexer) = self.inner {
if let Some(ref token) = self.peeked {
use perl_lexer::Checkpointable;
let pos = token.start;
let cp = perl_lexer::LexerCheckpoint::at_position(pos);
lexer.restore(&cp);
}
}
self.peeked = None;
self.peeked_second = None;
self.peeked_third = None;
}
pub fn invalidate_peek(&mut self) {
self.peeked = None;
self.peeked_third = None;
self.peeked_second = None;
}
pub fn peek_fresh_kind(&mut self) -> Option<TokenKind> {
self.invalidate_peek();
match self.peek() {
Ok(token) => Some(token.kind),
Err(_) => None,
}
}
fn next_token(&mut self) -> ParseResult<Token> {
match &mut self.inner {
TokenStreamInner::Lexer(lexer) => Self::next_token_from_lexer(lexer),
TokenStreamInner::Buffered(buf) => Self::next_token_from_buf(buf),
}
}
fn next_token_from_lexer(lexer: &mut PerlLexer<'_>) -> ParseResult<Token> {
loop {
let lexer_token = lexer.next_token().ok_or(ParseError::UnexpectedEof)?;
match &lexer_token.token_type {
LexerTokenType::Whitespace | LexerTokenType::Newline => continue,
LexerTokenType::Comment(_) => continue,
LexerTokenType::EOF => {
return Ok(Token {
kind: TokenKind::Eof,
text: String::new().into(),
start: lexer_token.start,
end: lexer_token.end,
});
}
_ => {
return Ok(Self::convert_lexer_token(lexer_token));
}
}
}
}
fn next_token_from_buf(buf: &mut VecDeque<Token>) -> ParseResult<Token> {
match buf.pop_front() {
Some(token) => Ok(token),
None => Ok(Token { kind: TokenKind::Eof, text: "".into(), start: 0, end: 0 }),
}
}
fn convert_lexer_token(token: LexerToken) -> Token {
let kind = match &token.token_type {
LexerTokenType::Keyword(kw) => match kw.as_ref() {
"my" => TokenKind::My,
"our" => TokenKind::Our,
"local" => TokenKind::Local,
"state" => TokenKind::State,
"sub" => TokenKind::Sub,
"if" => TokenKind::If,
"elsif" => TokenKind::Elsif,
"else" => TokenKind::Else,
"unless" => TokenKind::Unless,
"while" => TokenKind::While,
"until" => TokenKind::Until,
"for" => TokenKind::For,
"foreach" => TokenKind::Foreach,
"return" => TokenKind::Return,
"package" => TokenKind::Package,
"use" => TokenKind::Use,
"no" => TokenKind::No,
"BEGIN" => TokenKind::Begin,
"END" => TokenKind::End,
"CHECK" => TokenKind::Check,
"INIT" => TokenKind::Init,
"UNITCHECK" => TokenKind::Unitcheck,
"eval" => TokenKind::Eval,
"do" => TokenKind::Do,
"given" => TokenKind::Given,
"when" => TokenKind::When,
"default" => TokenKind::Default,
"try" => TokenKind::Try,
"catch" => TokenKind::Catch,
"field" => TokenKind::Field,
"finally" => TokenKind::Finally,
"continue" => TokenKind::Continue,
"next" => TokenKind::Next,
"last" => TokenKind::Last,
"redo" => TokenKind::Redo,
"goto" => TokenKind::Goto,
"class" => TokenKind::Class,
"method" => TokenKind::Method,
"format" => TokenKind::Format,
"undef" => TokenKind::Undef,
"and" => TokenKind::WordAnd,
"or" => TokenKind::WordOr,
"not" => TokenKind::WordNot,
"xor" => TokenKind::WordXor,
"cmp" => TokenKind::StringCompare,
"qw" => TokenKind::Identifier, _ => TokenKind::Identifier,
},
LexerTokenType::Operator(op) => match op.as_ref() {
"=" => TokenKind::Assign,
"+" => TokenKind::Plus,
"-" => TokenKind::Minus,
"*" => TokenKind::Star,
"/" => TokenKind::Slash,
"%" => TokenKind::Percent,
"**" => TokenKind::Power,
"<<" => TokenKind::LeftShift,
">>" => TokenKind::RightShift,
"&" => TokenKind::BitwiseAnd,
"|" => TokenKind::BitwiseOr,
"^" => TokenKind::BitwiseXor,
"~" => TokenKind::BitwiseNot,
"+=" => TokenKind::PlusAssign,
"-=" => TokenKind::MinusAssign,
"*=" => TokenKind::StarAssign,
"/=" => TokenKind::SlashAssign,
"%=" => TokenKind::PercentAssign,
".=" => TokenKind::DotAssign,
"&=" => TokenKind::AndAssign,
"|=" => TokenKind::OrAssign,
"^=" => TokenKind::XorAssign,
"**=" => TokenKind::PowerAssign,
"<<=" => TokenKind::LeftShiftAssign,
">>=" => TokenKind::RightShiftAssign,
"&&=" => TokenKind::LogicalAndAssign,
"||=" => TokenKind::LogicalOrAssign,
"//=" => TokenKind::DefinedOrAssign,
"==" => TokenKind::Equal,
"!=" => TokenKind::NotEqual,
"=~" => TokenKind::Match,
"!~" => TokenKind::NotMatch,
"~~" => TokenKind::SmartMatch,
"<" => TokenKind::Less,
">" => TokenKind::Greater,
"<=" => TokenKind::LessEqual,
">=" => TokenKind::GreaterEqual,
"<=>" => TokenKind::Spaceship,
"&&" => TokenKind::And,
"||" => TokenKind::Or,
"!" => TokenKind::Not,
"//" => TokenKind::DefinedOr,
"->" => TokenKind::Arrow,
"=>" => TokenKind::FatArrow,
"." => TokenKind::Dot,
".." => TokenKind::Range,
"..." => TokenKind::Ellipsis,
"++" => TokenKind::Increment,
"--" => TokenKind::Decrement,
"::" => TokenKind::DoubleColon,
"?" => TokenKind::Question,
":" => TokenKind::Colon,
"\\" => TokenKind::Backslash,
"$" => TokenKind::ScalarSigil,
"@" => TokenKind::ArraySigil,
_ => TokenKind::Unknown,
},
LexerTokenType::Arrow => TokenKind::Arrow,
LexerTokenType::FatComma => TokenKind::FatArrow,
LexerTokenType::LeftParen => TokenKind::LeftParen,
LexerTokenType::RightParen => TokenKind::RightParen,
LexerTokenType::LeftBrace => TokenKind::LeftBrace,
LexerTokenType::RightBrace => TokenKind::RightBrace,
LexerTokenType::LeftBracket => TokenKind::LeftBracket,
LexerTokenType::RightBracket => TokenKind::RightBracket,
LexerTokenType::Semicolon => TokenKind::Semicolon,
LexerTokenType::Comma => TokenKind::Comma,
LexerTokenType::Division => TokenKind::Slash,
LexerTokenType::Number(_) => TokenKind::Number,
LexerTokenType::StringLiteral | LexerTokenType::InterpolatedString(_) => {
TokenKind::String
}
LexerTokenType::RegexMatch | LexerTokenType::QuoteRegex => TokenKind::Regex,
LexerTokenType::Substitution => TokenKind::Substitution,
LexerTokenType::Transliteration => TokenKind::Transliteration,
LexerTokenType::QuoteSingle => TokenKind::QuoteSingle,
LexerTokenType::QuoteDouble => TokenKind::QuoteDouble,
LexerTokenType::QuoteWords => TokenKind::QuoteWords,
LexerTokenType::QuoteCommand => TokenKind::QuoteCommand,
LexerTokenType::HeredocStart => TokenKind::HeredocStart,
LexerTokenType::HeredocBody(_) => TokenKind::HeredocBody,
LexerTokenType::FormatBody(_) => TokenKind::FormatBody,
LexerTokenType::Version(_) => TokenKind::VString,
LexerTokenType::DataMarker(_) => TokenKind::DataMarker,
LexerTokenType::DataBody(_) => TokenKind::DataBody,
LexerTokenType::UnknownRest => TokenKind::UnknownRest,
LexerTokenType::Identifier(text) => {
match text.as_ref() {
"no" => TokenKind::No,
"*" => TokenKind::Star, "$" => TokenKind::ScalarSigil,
"@" => TokenKind::ArraySigil,
"%" => TokenKind::HashSigil,
"&" => TokenKind::SubSigil,
_ => TokenKind::Identifier,
}
}
LexerTokenType::Error(msg) => {
if msg.as_ref() == "Heredoc nesting too deep" {
TokenKind::HeredocDepthLimit
} else {
match token.text.as_ref() {
"{" => TokenKind::LeftBrace,
"}" => TokenKind::RightBrace,
_ => TokenKind::Unknown,
}
}
}
_ => TokenKind::Unknown,
};
Token { kind, text: token.text, start: token.start, end: token.end }
}
}