Skip to main content

scheme_rs/syntax/
parse.rs

1use crate::{
2    // ast::Literal,
3    num::Number,
4    ports::{PortData, PortInfo},
5    syntax::lex::ParseNumberError,
6    value::Value,
7};
8
9pub use super::lex::LexerError;
10use super::{
11    Span, Syntax,
12    lex::{Character, Lexeme, Lexer, Token},
13};
14use scheme_rs_macros::{maybe_async, maybe_await};
15use std::{char::CharTryFromError, error::Error as StdError, fmt};
16
17#[cfg(feature = "async")]
18use futures::future::BoxFuture;
19
20pub struct Parser<'a> {
21    /// We only ever need one token of lookahead probably, but this is more
22    /// obviously correct
23    lookahead: Vec<Token>,
24    lexer: Lexer<'a>,
25}
26
27macro_rules! token {
28    ( $pattern:pat ) => {
29        Token {
30            lexeme: $pattern,
31            ..
32        }
33    };
34    ( $pattern:pat, $span:pat ) => {
35        Token {
36            lexeme: $pattern,
37            span: $span,
38        }
39    };
40}
41
42impl<'a> Parser<'a> {
43    pub(crate) fn new(port_data: &'a mut PortData, port_info: &'a PortInfo, span: Span) -> Self {
44        Parser {
45            lookahead: Vec::new(),
46            lexer: Lexer::new(port_data, port_info, span),
47        }
48    }
49}
50
51impl Parser<'_> {
52    #[maybe_async]
53    fn next_token(&mut self) -> Result<Option<Token>, LexerError> {
54        if let Some(next) = self.lookahead.pop() {
55            Ok(Some(next))
56        } else {
57            maybe_await!(self.lexer.next_token())
58        }
59    }
60
61    pub(crate) fn curr_span(&self) -> Span {
62        self.lexer.curr_span()
63    }
64
65    fn return_token(&mut self, token: Token) {
66        self.lookahead.push(token)
67    }
68
69    #[cfg(feature = "async")]
70    pub fn expression(&mut self) -> BoxFuture<'_, Result<Option<Syntax>, ParseSyntaxError>> {
71        Box::pin(self.expression_inner())
72    }
73
74    #[cfg(not(feature = "async"))]
75    pub fn expression(&mut self) -> Result<Option<Syntax>, ParseSyntaxError> {
76        self.expression_inner()
77    }
78
79    #[maybe_async]
80    fn expression_inner(&mut self) -> Result<Option<Syntax>, ParseSyntaxError> {
81        match maybe_await!(self.next_token())?.ok_or(ParseSyntaxError::UnexpectedEof)? {
82            // Literals:
83            token!(Lexeme::Boolean(b), span) => Ok(Some(Syntax::new_wrapped(Value::from(b), span))),
84            token!(Lexeme::Character(Character::Literal(c)), span) => {
85                Ok(Some(Syntax::new_wrapped(Value::from(c), span)))
86            }
87            token!(Lexeme::Character(Character::Escaped(e)), span) => {
88                Ok(Some(Syntax::new_wrapped(Value::from(char::from(e)), span)))
89            }
90            token!(Lexeme::Character(Character::Unicode(u)), span) => {
91                Ok(Some(Syntax::new_wrapped(
92                    Value::from(char::try_from(u32::from_str_radix(&u, 16).unwrap())?),
93                    span,
94                )))
95            }
96            token!(Lexeme::String(s), span) => Ok(Some(Syntax::new_wrapped(Value::from(s), span))),
97            token!(Lexeme::Number(n), span) => Ok(Some(Syntax::new_wrapped(
98                Value::from(Number::try_from(n)?),
99                span,
100            ))),
101
102            // Identifiers:
103            token!(Lexeme::Identifier(ident), span) => {
104                Ok(Some(Syntax::new_identifier(&ident, span)))
105            }
106
107            // Lists:
108            token!(Lexeme::LParen, span) => {
109                Ok(Some(maybe_await!(self.list(span, Lexeme::RParen))?))
110            }
111            token!(Lexeme::LBracket, span) => {
112                Ok(Some(maybe_await!(self.list(span, Lexeme::RBracket))?))
113            }
114
115            // Vectors:
116            token!(Lexeme::HashParen, span) => Ok(Some(maybe_await!(self.vector(span))?)),
117            token!(Lexeme::Vu8Paren, span) => Ok(Some(maybe_await!(self.byte_vector(span))?)),
118
119            // Various aliases:
120            token!(Lexeme::Quote, span) => Ok(Some(maybe_await!(self.alias("quote", span))?)),
121            token!(Lexeme::Backquote, span) => {
122                Ok(Some(maybe_await!(self.alias("quasiquote", span))?))
123            }
124            token!(Lexeme::Comma, span) => Ok(Some(maybe_await!(self.alias("unquote", span))?)),
125            token!(Lexeme::CommaAt, span) => {
126                Ok(Some(maybe_await!(self.alias("unquote-splicing", span))?))
127            }
128            token!(Lexeme::HashQuote, span) => Ok(Some(maybe_await!(self.alias("syntax", span))?)),
129            token!(Lexeme::HashBackquote, span) => {
130                Ok(Some(maybe_await!(self.alias("quasisyntax", span))?))
131            }
132            token!(Lexeme::HashComma, span) => {
133                Ok(Some(maybe_await!(self.alias("unsyntax", span))?))
134            }
135            token!(Lexeme::HashCommaAt, span) => {
136                Ok(Some(maybe_await!(self.alias("unsyntax-splicing", span))?))
137            }
138
139            // Datum comments:
140            token!(Lexeme::DatumComment) => {
141                // Discard next expression:
142                let _ = maybe_await!(self.expression())?;
143                Ok(None)
144            }
145
146            // Handle some erroneous situations:
147            token!(Lexeme::RParen, span) | token!(Lexeme::RBracket, span) => {
148                Err(ParseSyntaxError::UnexpectedClosingParen { span })
149            }
150
151            token!(Lexeme::Period, span) => Err(ParseSyntaxError::InvalidPeriodLocation { span }),
152        }
153    }
154
155    #[maybe_async]
156    pub fn get_sexpr(&mut self) -> Result<Syntax, ParseSyntaxError> {
157        loop {
158            if let Some(expr) = maybe_await!(self.expression())? {
159                return Ok(expr);
160            }
161        }
162    }
163
164    #[maybe_async]
165    pub fn get_sexpr_or_eof(&mut self) -> Result<Option<Syntax>, ParseSyntaxError> {
166        loop {
167            // Check for EOF
168            match maybe_await!(self.next_token()) {
169                Ok(None) => return Ok(None),
170                Err(err) => return Err(ParseSyntaxError::Lex(err)),
171                Ok(Some(token)) => self.return_token(token),
172            }
173
174            if let Some(expr) = maybe_await!(self.expression())? {
175                return Ok(Some(expr));
176            }
177        }
178    }
179
180    #[maybe_async]
181    pub fn all_sexprs(&mut self) -> Result<Syntax, ParseSyntaxError> {
182        let start_span = self.lexer.curr_span();
183        let mut sexprs = Vec::new();
184        loop {
185            // Check for EOF
186            match maybe_await!(self.next_token()) {
187                Ok(None) => {
188                    let end_span = self.lexer.curr_span();
189                    sexprs.push(Syntax::new_wrapped(Value::null(), end_span));
190                    return Ok(Syntax::List {
191                        list: sexprs,
192                        span: start_span,
193                    });
194                }
195                Err(err) => return Err(ParseSyntaxError::Lex(err)),
196                Ok(Some(token)) => self.return_token(token),
197            }
198
199            sexprs.push(maybe_await!(self.get_sexpr())?);
200        }
201    }
202
203    #[maybe_async]
204    fn list(&mut self, span: Span, closing: Lexeme) -> Result<Syntax, ParseSyntaxError> {
205        match maybe_await!(self.next_token())?.ok_or(ParseSyntaxError::UnexpectedEof)? {
206            // We allow for (. expr) to resolve to expr, just because it's
207            // easier. Maybe we'll disallow this eventualy
208            token!(Lexeme::Period) => return maybe_await!(self.get_sexpr()),
209            // If the first token is a closing paren, then this is an empty
210            // list
211            token if token.lexeme == closing => {
212                return Ok(Syntax::new_wrapped(Value::null(), token.span));
213            }
214            // Otherwise, push the token back and continue
215            token => {
216                self.return_token(token);
217            }
218        }
219
220        let mut output = Vec::new();
221        loop {
222            if let Some(expr) = maybe_await!(self.expression())? {
223                output.push(expr);
224            }
225            match maybe_await!(self.next_token())?.ok_or(ParseSyntaxError::UnexpectedEof)? {
226                token if token.lexeme == closing => {
227                    output.push(Syntax::new_wrapped(Value::null(), token.span));
228                    return Ok(Syntax::new_list(output, span));
229                }
230                token!(Lexeme::Period) => {
231                    let peek1 =
232                        maybe_await!(self.next_token())?.ok_or(ParseSyntaxError::UnexpectedEof)?;
233                    let peek2 =
234                        maybe_await!(self.next_token())?.ok_or(ParseSyntaxError::UnexpectedEof)?;
235                    match (peek1, peek2) {
236                        // Proper list with period:
237                        (token!(Lexeme::LParen, end_span), token!(Lexeme::RParen))
238                        | (token!(Lexeme::LBracket, end_span), token!(Lexeme::RBracket)) => {
239                            output.push(Syntax::new_wrapped(Value::null(), end_span));
240                            return Ok(Syntax::new_list(output, span));
241                        }
242                        // Improper list:
243                        (peek1, peek2) => {
244                            self.return_token(peek2);
245                            self.return_token(peek1);
246                        }
247                    }
248                    output.push(maybe_await!(self.get_sexpr())?);
249                    let last =
250                        maybe_await!(self.next_token())?.ok_or(ParseSyntaxError::UnexpectedEof)?;
251                    if last.lexeme == closing {
252                        return Ok(Syntax::new_list(output, span));
253                    } else {
254                        return Err(ParseSyntaxError::ExpectedClosingParen { span: last.span });
255                    }
256                }
257                token => self.return_token(token),
258            }
259        }
260    }
261
262    #[maybe_async]
263    fn vector(&mut self, span: Span) -> Result<Syntax, ParseSyntaxError> {
264        let mut output = Vec::new();
265        loop {
266            match maybe_await!(self.next_token())?.ok_or(ParseSyntaxError::UnexpectedEof)? {
267                token!(Lexeme::RParen) => return Ok(Syntax::new_vector(output, span)),
268                token => {
269                    self.return_token(token);
270                    if let Some(expr) = maybe_await!(self.expression())? {
271                        output.push(expr);
272                    }
273                }
274            }
275        }
276    }
277
278    #[maybe_async]
279    fn byte_vector(&mut self, span: Span) -> Result<Syntax, ParseSyntaxError> {
280        let mut output = Vec::new();
281        loop {
282            match maybe_await!(self.next_token())?.ok_or(ParseSyntaxError::UnexpectedEof)? {
283                token!(Lexeme::Number(num), span) => {
284                    let num: Number = num.try_into()?;
285                    if let Some(simple) = num.as_simple()
286                        && let Ok(byte) = u8::try_from(simple)
287                    {
288                        output.push(byte);
289                        continue;
290                    }
291                    return Err(ParseSyntaxError::NonByte { span });
292                }
293                token!(Lexeme::RParen) => {
294                    return Ok(Syntax::new_wrapped(Value::from(output), span));
295                }
296                token => {
297                    return Err(ParseSyntaxError::NonByte { span: token.span });
298                }
299            }
300        }
301    }
302
303    #[maybe_async]
304    fn alias(&mut self, alias: &str, span: Span) -> Result<Syntax, ParseSyntaxError> {
305        let expr = maybe_await!(self.get_sexpr())?;
306        let expr_span = expr.span().clone();
307        Ok(Syntax::new_list(
308            vec![
309                Syntax::new_identifier(alias, span.clone()),
310                expr,
311                Syntax::new_wrapped(Value::null(), expr_span),
312            ],
313            span,
314        ))
315    }
316}
317
318#[derive(Debug)]
319pub enum ParseSyntaxError {
320    UnexpectedEof,
321    ExpectedClosingParen { span: Span },
322    UnexpectedClosingParen { span: Span },
323    InvalidPeriodLocation { span: Span },
324    NonByte { span: Span },
325    UnclosedParen { span: Span },
326    CharTryFrom(CharTryFromError),
327    Lex(LexerError),
328    ParseNumberError(ParseNumberError),
329    UnexpectedToken { token: Box<Token> },
330}
331
332impl fmt::Display for ParseSyntaxError {
333    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
334        match self {
335            // Self::EmptyInput => write!(f, "cannot parse an empty list"),
336            Self::UnexpectedEof => write!(f, "unexpected end of file"),
337            Self::ExpectedClosingParen { span } => {
338                write!(f, "closing parenthesis not found at `{span}`")
339            }
340            Self::UnexpectedClosingParen { span } => {
341                write!(f, "unexpected closing parenthesis found at `{span}`")
342            }
343            Self::InvalidPeriodLocation { span } => {
344                write!(f, "invalid period found at location `{span}`")
345            }
346            Self::NonByte { span } => write!(
347                f,
348                "non byte value found in byte vector at location `{span}`",
349            ),
350            Self::UnclosedParen { span } => {
351                write!(f, "unclosed parenthesis at location `{span}`")
352            }
353            Self::CharTryFrom(e) => write!(f, "{e}"),
354            Self::Lex(e) => write!(f, "{e:?}"),
355            Self::ParseNumberError(e) => write!(f, "{e:?}"),
356            Self::UnexpectedToken { token } => {
357                write!(
358                    f,
359                    "unexpected token {:?} at location `{}`",
360                    token.lexeme, token.span
361                )
362            }
363        }
364    }
365}
366impl StdError for ParseSyntaxError {}
367
368impl From<LexerError> for ParseSyntaxError {
369    fn from(lex: LexerError) -> Self {
370        Self::Lex(lex)
371    }
372}
373
374impl From<CharTryFromError> for ParseSyntaxError {
375    fn from(e: CharTryFromError) -> Self {
376        Self::CharTryFrom(e)
377    }
378}
379
380impl From<ParseNumberError> for ParseSyntaxError {
381    fn from(e: ParseNumberError) -> Self {
382        Self::ParseNumberError(e)
383    }
384}