use crate::{
num::Number,
ports::{PortData, PortInfo},
syntax::lex::ParseNumberError,
value::Value,
};
pub use super::lex::LexerError;
use super::{
Span, Syntax,
lex::{Character, Lexeme, Lexer, Token},
};
use scheme_rs_macros::{maybe_async, maybe_await};
use std::{char::CharTryFromError, error::Error as StdError, fmt};
#[cfg(feature = "async")]
use futures::future::BoxFuture;
pub struct Parser<'a> {
lookahead: Vec<Token>,
lexer: Lexer<'a>,
}
macro_rules! token {
( $pattern:pat ) => {
Token {
lexeme: $pattern,
..
}
};
( $pattern:pat, $span:pat ) => {
Token {
lexeme: $pattern,
span: $span,
}
};
}
impl<'a> Parser<'a> {
pub(crate) fn new(port_data: &'a mut PortData, port_info: &'a PortInfo, span: Span) -> Self {
Parser {
lookahead: Vec::new(),
lexer: Lexer::new(port_data, port_info, span),
}
}
}
impl Parser<'_> {
#[maybe_async]
fn next_token(&mut self) -> Result<Option<Token>, LexerError> {
if let Some(next) = self.lookahead.pop() {
Ok(Some(next))
} else {
maybe_await!(self.lexer.next_token())
}
}
pub(crate) fn curr_span(&self) -> Span {
self.lexer.curr_span()
}
fn return_token(&mut self, token: Token) {
self.lookahead.push(token)
}
#[cfg(feature = "async")]
pub fn expression(&mut self) -> BoxFuture<'_, Result<Option<Syntax>, ParseSyntaxError>> {
Box::pin(self.expression_inner())
}
#[cfg(not(feature = "async"))]
pub fn expression(&mut self) -> Result<Option<Syntax>, ParseSyntaxError> {
self.expression_inner()
}
#[maybe_async]
fn expression_inner(&mut self) -> Result<Option<Syntax>, ParseSyntaxError> {
match maybe_await!(self.next_token())?.ok_or(ParseSyntaxError::UnexpectedEof)? {
token!(Lexeme::Boolean(b), span) => Ok(Some(Syntax::new_wrapped(Value::from(b), span))),
token!(Lexeme::Character(Character::Literal(c)), span) => {
Ok(Some(Syntax::new_wrapped(Value::from(c), span)))
}
token!(Lexeme::Character(Character::Escaped(e)), span) => {
Ok(Some(Syntax::new_wrapped(Value::from(char::from(e)), span)))
}
token!(Lexeme::Character(Character::Unicode(u)), span) => {
Ok(Some(Syntax::new_wrapped(
Value::from(char::try_from(u32::from_str_radix(&u, 16).unwrap())?),
span,
)))
}
token!(Lexeme::String(s), span) => Ok(Some(Syntax::new_wrapped(Value::from(s), span))),
token!(Lexeme::Number(n), span) => Ok(Some(Syntax::new_wrapped(
Value::from(Number::try_from(n)?),
span,
))),
token!(Lexeme::Identifier(ident), span) => {
Ok(Some(Syntax::new_identifier(&ident, span)))
}
token!(Lexeme::LParen, span) => {
Ok(Some(maybe_await!(self.list(span, Lexeme::RParen))?))
}
token!(Lexeme::LBracket, span) => {
Ok(Some(maybe_await!(self.list(span, Lexeme::RBracket))?))
}
token!(Lexeme::HashParen, span) => Ok(Some(maybe_await!(self.vector(span))?)),
token!(Lexeme::Vu8Paren, span) => Ok(Some(maybe_await!(self.byte_vector(span))?)),
token!(Lexeme::Quote, span) => Ok(Some(maybe_await!(self.alias("quote", span))?)),
token!(Lexeme::Backquote, span) => {
Ok(Some(maybe_await!(self.alias("quasiquote", span))?))
}
token!(Lexeme::Comma, span) => Ok(Some(maybe_await!(self.alias("unquote", span))?)),
token!(Lexeme::CommaAt, span) => {
Ok(Some(maybe_await!(self.alias("unquote-splicing", span))?))
}
token!(Lexeme::HashQuote, span) => Ok(Some(maybe_await!(self.alias("syntax", span))?)),
token!(Lexeme::HashBackquote, span) => {
Ok(Some(maybe_await!(self.alias("quasisyntax", span))?))
}
token!(Lexeme::HashComma, span) => {
Ok(Some(maybe_await!(self.alias("unsyntax", span))?))
}
token!(Lexeme::HashCommaAt, span) => {
Ok(Some(maybe_await!(self.alias("unsyntax-splicing", span))?))
}
token!(Lexeme::DatumComment) => {
let _ = maybe_await!(self.expression())?;
Ok(None)
}
token!(Lexeme::RParen, span) | token!(Lexeme::RBracket, span) => {
Err(ParseSyntaxError::UnexpectedClosingParen { span })
}
token!(Lexeme::Period, span) => Err(ParseSyntaxError::InvalidPeriodLocation { span }),
}
}
#[maybe_async]
pub fn get_sexpr(&mut self) -> Result<Syntax, ParseSyntaxError> {
loop {
if let Some(expr) = maybe_await!(self.expression())? {
return Ok(expr);
}
}
}
#[maybe_async]
pub fn get_sexpr_or_eof(&mut self) -> Result<Option<Syntax>, ParseSyntaxError> {
loop {
match maybe_await!(self.next_token()) {
Ok(None) => return Ok(None),
Err(err) => return Err(ParseSyntaxError::Lex(err)),
Ok(Some(token)) => self.return_token(token),
}
if let Some(expr) = maybe_await!(self.expression())? {
return Ok(Some(expr));
}
}
}
#[maybe_async]
pub fn all_sexprs(&mut self) -> Result<Syntax, ParseSyntaxError> {
let start_span = self.lexer.curr_span();
let mut sexprs = Vec::new();
loop {
match maybe_await!(self.next_token()) {
Ok(None) => {
let end_span = self.lexer.curr_span();
sexprs.push(Syntax::new_wrapped(Value::null(), end_span));
return Ok(Syntax::List {
list: sexprs,
span: start_span,
});
}
Err(err) => return Err(ParseSyntaxError::Lex(err)),
Ok(Some(token)) => self.return_token(token),
}
sexprs.push(maybe_await!(self.get_sexpr())?);
}
}
#[maybe_async]
fn list(&mut self, span: Span, closing: Lexeme) -> Result<Syntax, ParseSyntaxError> {
match maybe_await!(self.next_token())?.ok_or(ParseSyntaxError::UnexpectedEof)? {
token!(Lexeme::Period) => return maybe_await!(self.get_sexpr()),
token if token.lexeme == closing => {
return Ok(Syntax::new_wrapped(Value::null(), token.span));
}
token => {
self.return_token(token);
}
}
let mut output = Vec::new();
loop {
if let Some(expr) = maybe_await!(self.expression())? {
output.push(expr);
}
match maybe_await!(self.next_token())?.ok_or(ParseSyntaxError::UnexpectedEof)? {
token if token.lexeme == closing => {
output.push(Syntax::new_wrapped(Value::null(), token.span));
return Ok(Syntax::new_list(output, span));
}
token!(Lexeme::Period) => {
let peek1 =
maybe_await!(self.next_token())?.ok_or(ParseSyntaxError::UnexpectedEof)?;
let peek2 =
maybe_await!(self.next_token())?.ok_or(ParseSyntaxError::UnexpectedEof)?;
match (peek1, peek2) {
(token!(Lexeme::LParen, end_span), token!(Lexeme::RParen))
| (token!(Lexeme::LBracket, end_span), token!(Lexeme::RBracket)) => {
output.push(Syntax::new_wrapped(Value::null(), end_span));
return Ok(Syntax::new_list(output, span));
}
(peek1, peek2) => {
self.return_token(peek2);
self.return_token(peek1);
}
}
output.push(maybe_await!(self.get_sexpr())?);
let last =
maybe_await!(self.next_token())?.ok_or(ParseSyntaxError::UnexpectedEof)?;
if last.lexeme == closing {
return Ok(Syntax::new_list(output, span));
} else {
return Err(ParseSyntaxError::ExpectedClosingParen { span: last.span });
}
}
token => self.return_token(token),
}
}
}
#[maybe_async]
fn vector(&mut self, span: Span) -> Result<Syntax, ParseSyntaxError> {
let mut output = Vec::new();
loop {
match maybe_await!(self.next_token())?.ok_or(ParseSyntaxError::UnexpectedEof)? {
token!(Lexeme::RParen) => return Ok(Syntax::new_vector(output, span)),
token => {
self.return_token(token);
if let Some(expr) = maybe_await!(self.expression())? {
output.push(expr);
}
}
}
}
}
#[maybe_async]
fn byte_vector(&mut self, span: Span) -> Result<Syntax, ParseSyntaxError> {
let mut output = Vec::new();
loop {
match maybe_await!(self.next_token())?.ok_or(ParseSyntaxError::UnexpectedEof)? {
token!(Lexeme::Number(num), span) => {
let num: Number = num.try_into()?;
if let Some(simple) = num.as_simple()
&& let Ok(byte) = u8::try_from(simple)
{
output.push(byte);
continue;
}
return Err(ParseSyntaxError::NonByte { span });
}
token!(Lexeme::RParen) => {
return Ok(Syntax::new_wrapped(Value::from(output), span));
}
token => {
return Err(ParseSyntaxError::NonByte { span: token.span });
}
}
}
}
#[maybe_async]
fn alias(&mut self, alias: &str, span: Span) -> Result<Syntax, ParseSyntaxError> {
let expr = maybe_await!(self.get_sexpr())?;
let expr_span = expr.span().clone();
Ok(Syntax::new_list(
vec![
Syntax::new_identifier(alias, span.clone()),
expr,
Syntax::new_wrapped(Value::null(), expr_span),
],
span,
))
}
}
#[derive(Debug)]
pub enum ParseSyntaxError {
UnexpectedEof,
ExpectedClosingParen { span: Span },
UnexpectedClosingParen { span: Span },
InvalidPeriodLocation { span: Span },
NonByte { span: Span },
UnclosedParen { span: Span },
CharTryFrom(CharTryFromError),
Lex(LexerError),
ParseNumberError(ParseNumberError),
UnexpectedToken { token: Box<Token> },
}
impl fmt::Display for ParseSyntaxError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Self::UnexpectedEof => write!(f, "unexpected end of file"),
Self::ExpectedClosingParen { span } => {
write!(f, "closing parenthesis not found at `{span}`")
}
Self::UnexpectedClosingParen { span } => {
write!(f, "unexpected closing parenthesis found at `{span}`")
}
Self::InvalidPeriodLocation { span } => {
write!(f, "invalid period found at location `{span}`")
}
Self::NonByte { span } => write!(
f,
"non byte value found in byte vector at location `{span}`",
),
Self::UnclosedParen { span } => {
write!(f, "unclosed parenthesis at location `{span}`")
}
Self::CharTryFrom(e) => write!(f, "{e}"),
Self::Lex(e) => write!(f, "{e:?}"),
Self::ParseNumberError(e) => write!(f, "{e:?}"),
Self::UnexpectedToken { token } => {
write!(
f,
"unexpected token {:?} at location `{}`",
token.lexeme, token.span
)
}
}
}
}
impl StdError for ParseSyntaxError {}
impl From<LexerError> for ParseSyntaxError {
fn from(lex: LexerError) -> Self {
Self::Lex(lex)
}
}
impl From<CharTryFromError> for ParseSyntaxError {
fn from(e: CharTryFromError) -> Self {
Self::CharTryFrom(e)
}
}
impl From<ParseNumberError> for ParseSyntaxError {
fn from(e: ParseNumberError) -> Self {
Self::ParseNumberError(e)
}
}