use crate::syntax::error::{ParseError, ParseResult};
use perl_lexer::{LexerMode, PerlLexer, Token as LexerToken, TokenType as LexerTokenType};
pub use perl_token::{Token, TokenKind};
use std::collections::VecDeque;
enum TokenStreamInner<'a> {
Lexer(PerlLexer<'a>),
Buffered(VecDeque<Token>),
}
pub struct TokenStream<'a> {
inner: TokenStreamInner<'a>,
buffered_eof_pos: usize,
peeked: Option<Token>,
peeked_second: Option<Token>,
peeked_third: Option<Token>,
}
impl<'a> TokenStream<'a> {
pub fn new(input: &'a str) -> Self {
TokenStream {
inner: TokenStreamInner::Lexer(PerlLexer::new(input)),
buffered_eof_pos: input.len(),
peeked: None,
peeked_second: None,
peeked_third: None,
}
}
pub fn from_vec(tokens: Vec<Token>) -> Self {
let buffered_eof_pos = tokens
.last()
.map(|token| if token.kind == TokenKind::Eof { token.start } else { token.end })
.unwrap_or(0);
TokenStream {
inner: TokenStreamInner::Buffered(VecDeque::from(tokens)),
buffered_eof_pos,
peeked: None,
peeked_second: None,
peeked_third: None,
}
}
pub fn lexer_tokens_to_parser_tokens(tokens: Vec<LexerToken>) -> Vec<Token> {
tokens
.into_iter()
.filter(|t| {
!matches!(
t.token_type,
LexerTokenType::Whitespace | LexerTokenType::Newline | LexerTokenType::EOF
) && !matches!(t.token_type, LexerTokenType::Comment(_))
})
.map(Self::convert_lexer_token)
.collect()
}
pub fn peek(&mut self) -> ParseResult<&Token> {
if self.peeked.is_none() {
self.peeked = Some(self.next_token()?);
}
self.peeked.as_ref().ok_or(ParseError::UnexpectedEof)
}
#[allow(clippy::should_implement_trait)]
pub fn next(&mut self) -> ParseResult<Token> {
if let Some(token) = self.peeked.take() {
if token.kind == TokenKind::Eof {
self.peeked = Some(token.clone());
} else {
self.peeked = self.peeked_second.take();
self.peeked_second = self.peeked_third.take();
}
Ok(token)
} else {
let token = self.next_token()?;
if token.kind == TokenKind::Eof {
self.peeked = Some(token.clone());
}
Ok(token)
}
}
pub fn is_eof(&mut self) -> bool {
matches!(self.peek(), Ok(token) if token.kind == TokenKind::Eof)
}
pub fn peek_second(&mut self) -> ParseResult<&Token> {
self.peek()?;
if self.peeked_second.is_none() {
self.peeked_second = Some(self.next_token()?);
}
self.peeked_second.as_ref().ok_or(ParseError::UnexpectedEof)
}
pub fn peek_third(&mut self) -> ParseResult<&Token> {
self.peek_second()?;
if self.peeked_third.is_none() {
self.peeked_third = Some(self.next_token()?);
}
self.peeked_third.as_ref().ok_or(ParseError::UnexpectedEof)
}
pub fn enter_format_mode(&mut self) {
if let TokenStreamInner::Lexer(ref mut lexer) = self.inner {
lexer.enter_format_mode();
}
}
pub fn on_stmt_boundary(&mut self) {
self.peeked = None;
self.peeked_second = None;
self.peeked_third = None;
if let TokenStreamInner::Lexer(ref mut lexer) = self.inner {
lexer.set_mode(LexerMode::ExpectTerm);
}
}
pub fn relex_as_term(&mut self) {
if let TokenStreamInner::Lexer(ref mut lexer) = self.inner {
if let Some(ref token) = self.peeked {
use perl_lexer::Checkpointable;
let pos = token.start;
let cp = perl_lexer::LexerCheckpoint::at_position(pos);
lexer.restore(&cp);
}
}
self.peeked = None;
self.peeked_second = None;
self.peeked_third = None;
}
pub fn invalidate_peek(&mut self) {
self.peeked = None;
self.peeked_third = None;
self.peeked_second = None;
}
pub fn peek_fresh_kind(&mut self) -> Option<TokenKind> {
self.invalidate_peek();
match self.peek() {
Ok(token) => Some(token.kind),
Err(_) => None,
}
}
fn next_token(&mut self) -> ParseResult<Token> {
match &mut self.inner {
TokenStreamInner::Lexer(lexer) => Self::next_token_from_lexer(lexer),
TokenStreamInner::Buffered(buf) => {
Self::next_token_from_buf(buf, &mut self.buffered_eof_pos)
}
}
}
fn next_token_from_lexer(lexer: &mut PerlLexer<'_>) -> ParseResult<Token> {
loop {
let lexer_token = lexer.next_token().ok_or(ParseError::UnexpectedEof)?;
match &lexer_token.token_type {
LexerTokenType::Whitespace | LexerTokenType::Newline => continue,
LexerTokenType::Comment(_) => continue,
LexerTokenType::EOF => {
return Ok(Token {
kind: TokenKind::Eof,
text: String::new().into(),
start: lexer_token.start,
end: lexer_token.end,
});
}
_ => {
return Ok(Self::convert_lexer_token(lexer_token));
}
}
}
}
fn next_token_from_buf(
buf: &mut VecDeque<Token>,
buffered_eof_pos: &mut usize,
) -> ParseResult<Token> {
match buf.pop_front() {
Some(token) => {
*buffered_eof_pos =
if token.kind == TokenKind::Eof { token.start } else { token.end };
Ok(token)
}
None => Ok(Token::eof_at(*buffered_eof_pos)),
}
}
fn convert_lexer_token(token: LexerToken) -> Token {
let kind = match &token.token_type {
LexerTokenType::Keyword(kw) => match kw.as_ref() {
"qw" => TokenKind::Identifier, keyword => TokenKind::from_keyword(keyword).unwrap_or(TokenKind::Identifier),
},
LexerTokenType::Operator(op) => TokenKind::from_operator(op)
.or_else(|| TokenKind::from_sigil(op))
.unwrap_or(TokenKind::Unknown),
LexerTokenType::Arrow => TokenKind::Arrow,
LexerTokenType::FatComma => TokenKind::FatArrow,
LexerTokenType::LeftParen => TokenKind::LeftParen,
LexerTokenType::RightParen => TokenKind::RightParen,
LexerTokenType::LeftBrace => TokenKind::LeftBrace,
LexerTokenType::RightBrace => TokenKind::RightBrace,
LexerTokenType::LeftBracket => TokenKind::LeftBracket,
LexerTokenType::RightBracket => TokenKind::RightBracket,
LexerTokenType::Semicolon => TokenKind::Semicolon,
LexerTokenType::Comma => TokenKind::Comma,
LexerTokenType::Division => TokenKind::Slash,
LexerTokenType::Number(_) => TokenKind::Number,
LexerTokenType::StringLiteral | LexerTokenType::InterpolatedString(_) => {
TokenKind::String
}
LexerTokenType::RegexMatch | LexerTokenType::QuoteRegex => TokenKind::Regex,
LexerTokenType::Substitution => TokenKind::Substitution,
LexerTokenType::Transliteration => TokenKind::Transliteration,
LexerTokenType::QuoteSingle => TokenKind::QuoteSingle,
LexerTokenType::QuoteDouble => TokenKind::QuoteDouble,
LexerTokenType::QuoteWords => TokenKind::QuoteWords,
LexerTokenType::QuoteCommand => TokenKind::QuoteCommand,
LexerTokenType::HeredocStart => TokenKind::HeredocStart,
LexerTokenType::HeredocBody(_) => TokenKind::HeredocBody,
LexerTokenType::FormatBody(_) => TokenKind::FormatBody,
LexerTokenType::Version(_) => TokenKind::VString,
LexerTokenType::DataMarker(_) => TokenKind::DataMarker,
LexerTokenType::DataBody(_) => TokenKind::DataBody,
LexerTokenType::UnknownRest => TokenKind::UnknownRest,
LexerTokenType::Identifier(text) => {
match text.as_ref() {
"%" => TokenKind::HashSigil,
"&" => TokenKind::SubSigil,
_ => TokenKind::from_keyword(text)
.or_else(|| TokenKind::from_operator(text))
.or_else(|| TokenKind::from_sigil(text))
.unwrap_or(TokenKind::Identifier),
}
}
LexerTokenType::Error(msg) => {
if msg.as_ref() == "Heredoc nesting too deep" {
TokenKind::HeredocDepthLimit
} else {
TokenKind::from_delimiter(token.text.as_ref()).unwrap_or(TokenKind::Unknown)
}
}
_ => TokenKind::Unknown,
};
Token { kind, text: token.text, start: token.start, end: token.end }
}
}