Skip to main content

scheme_rs/syntax/
parse.rs

1use crate::{
2    num::Number,
3    ports::{PortData, PortInfo},
4    syntax::lex::ParseNumberError,
5    value::Value,
6};
7
8pub use super::lex::LexerError;
9use super::{
10    Span, Syntax,
11    lex::{Character, Lexeme, Lexer, Token},
12};
13use scheme_rs_macros::{maybe_async, maybe_await};
14use std::{char::CharTryFromError, error::Error as StdError, fmt};
15
16#[cfg(feature = "async")]
17use futures::future::BoxFuture;
18
19pub struct Parser<'a> {
20    /// We only ever need one token of lookahead probably, but this is more
21    /// obviously correct
22    lookahead: Vec<Token>,
23    lexer: Lexer<'a>,
24}
25
26macro_rules! token {
27    ( $pattern:pat ) => {
28        Token {
29            lexeme: $pattern,
30            ..
31        }
32    };
33    ( $pattern:pat, $span:pat ) => {
34        Token {
35            lexeme: $pattern,
36            span: $span,
37        }
38    };
39}
40
41impl<'a> Parser<'a> {
42    pub(crate) fn new(port_data: &'a mut PortData, port_info: &'a PortInfo, span: Span) -> Self {
43        Parser {
44            lookahead: Vec::new(),
45            lexer: Lexer::new(port_data, port_info, span),
46        }
47    }
48}
49
50impl Parser<'_> {
51    #[maybe_async]
52    fn next_token(&mut self) -> Result<Option<Token>, LexerError> {
53        if let Some(next) = self.lookahead.pop() {
54            Ok(Some(next))
55        } else {
56            maybe_await!(self.lexer.next_token())
57        }
58    }
59
60    pub(crate) fn curr_span(&self) -> Span {
61        self.lexer.curr_span()
62    }
63
64    fn return_token(&mut self, token: Token) {
65        self.lookahead.push(token)
66    }
67
68    #[cfg(feature = "async")]
69    pub fn expression(&mut self) -> BoxFuture<'_, Result<Option<Syntax>, ParseSyntaxError>> {
70        Box::pin(self.expression_inner())
71    }
72
73    #[cfg(not(feature = "async"))]
74    pub fn expression(&mut self) -> Result<Option<Syntax>, ParseSyntaxError> {
75        self.expression_inner()
76    }
77
78    #[maybe_async]
79    fn expression_inner(&mut self) -> Result<Option<Syntax>, ParseSyntaxError> {
80        match maybe_await!(self.next_token())?.ok_or(ParseSyntaxError::UnexpectedEof)? {
81            // Literals:
82            token!(Lexeme::Boolean(b), span) => Ok(Some(Syntax::new_wrapped(Value::from(b), span))),
83            token!(Lexeme::Character(Character::Literal(c)), span) => {
84                Ok(Some(Syntax::new_wrapped(Value::from(c), span)))
85            }
86            token!(Lexeme::Character(Character::Escaped(e)), span) => {
87                Ok(Some(Syntax::new_wrapped(Value::from(char::from(e)), span)))
88            }
89            token!(Lexeme::Character(Character::Unicode(u)), span) => {
90                Ok(Some(Syntax::new_wrapped(
91                    Value::from(char::try_from(u32::from_str_radix(&u, 16).unwrap())?),
92                    span,
93                )))
94            }
95            token!(Lexeme::String(s), span) => Ok(Some(Syntax::new_wrapped(Value::from(s), span))),
96            token!(Lexeme::Number(n), span) => Ok(Some(Syntax::new_wrapped(
97                Value::from(Number::try_from(n)?),
98                span,
99            ))),
100
101            // Identifiers:
102            token!(Lexeme::Identifier(ident), span) => {
103                Ok(Some(Syntax::new_identifier(&ident, span)))
104            }
105
106            // Lists:
107            token!(Lexeme::LParen, span) => {
108                Ok(Some(maybe_await!(self.list(span, Lexeme::RParen))?))
109            }
110            token!(Lexeme::LBracket, span) => {
111                Ok(Some(maybe_await!(self.list(span, Lexeme::RBracket))?))
112            }
113
114            // Vectors:
115            token!(Lexeme::HashParen, span) => Ok(Some(maybe_await!(self.vector(span))?)),
116            token!(Lexeme::Vu8Paren, span) => Ok(Some(maybe_await!(self.byte_vector(span))?)),
117
118            // Various aliases:
119            token!(Lexeme::Quote, span) => Ok(Some(maybe_await!(self.alias("quote", span))?)),
120            token!(Lexeme::Backquote, span) => {
121                Ok(Some(maybe_await!(self.alias("quasiquote", span))?))
122            }
123            token!(Lexeme::Comma, span) => Ok(Some(maybe_await!(self.alias("unquote", span))?)),
124            token!(Lexeme::CommaAt, span) => {
125                Ok(Some(maybe_await!(self.alias("unquote-splicing", span))?))
126            }
127            token!(Lexeme::HashQuote, span) => Ok(Some(maybe_await!(self.alias("syntax", span))?)),
128            token!(Lexeme::HashBackquote, span) => {
129                Ok(Some(maybe_await!(self.alias("quasisyntax", span))?))
130            }
131            token!(Lexeme::HashComma, span) => {
132                Ok(Some(maybe_await!(self.alias("unsyntax", span))?))
133            }
134            token!(Lexeme::HashCommaAt, span) => {
135                Ok(Some(maybe_await!(self.alias("unsyntax-splicing", span))?))
136            }
137
138            // Datum comments:
139            token!(Lexeme::DatumComment) => {
140                // Discard next expression:
141                let _ = maybe_await!(self.expression())?;
142                Ok(None)
143            }
144
145            // Handle some erroneous situations:
146            token!(Lexeme::RParen, span) | token!(Lexeme::RBracket, span) => {
147                Err(ParseSyntaxError::UnexpectedClosingParen { span })
148            }
149
150            token!(Lexeme::Period, span) => Err(ParseSyntaxError::InvalidPeriodLocation { span }),
151        }
152    }
153
154    #[maybe_async]
155    pub fn get_sexpr(&mut self) -> Result<Syntax, ParseSyntaxError> {
156        loop {
157            if let Some(expr) = maybe_await!(self.expression())? {
158                return Ok(expr);
159            }
160        }
161    }
162
163    #[maybe_async]
164    pub fn get_sexpr_or_eof(&mut self) -> Result<Option<Syntax>, ParseSyntaxError> {
165        loop {
166            // Check for EOF
167            match maybe_await!(self.next_token()) {
168                Ok(None) => return Ok(None),
169                Err(err) => return Err(ParseSyntaxError::Lex(err)),
170                Ok(Some(token)) => self.return_token(token),
171            }
172
173            if let Some(expr) = maybe_await!(self.expression())? {
174                return Ok(Some(expr));
175            }
176        }
177    }
178
179    #[maybe_async]
180    pub fn all_sexprs(&mut self) -> Result<Syntax, ParseSyntaxError> {
181        let start_span = self.lexer.curr_span();
182        let mut sexprs = Vec::new();
183        loop {
184            // Check for EOF
185            match maybe_await!(self.next_token()) {
186                Ok(None) => {
187                    let end_span = self.lexer.curr_span();
188                    sexprs.push(Syntax::new_wrapped(Value::null(), end_span));
189                    return Ok(Syntax::List {
190                        list: sexprs,
191                        span: start_span,
192                    });
193                }
194                Err(err) => return Err(ParseSyntaxError::Lex(err)),
195                Ok(Some(token)) => self.return_token(token),
196            }
197
198            sexprs.push(maybe_await!(self.get_sexpr())?);
199        }
200    }
201
202    #[maybe_async]
203    fn list(&mut self, span: Span, closing: Lexeme) -> Result<Syntax, ParseSyntaxError> {
204        match maybe_await!(self.next_token())?.ok_or(ParseSyntaxError::UnexpectedEof)? {
205            // We allow for (. expr) to resolve to expr, just because it's
206            // easier. Maybe we'll disallow this eventualy
207            token!(Lexeme::Period) => return maybe_await!(self.get_sexpr()),
208            // If the first token is a closing paren, then this is an empty
209            // list
210            token if token.lexeme == closing => {
211                return Ok(Syntax::new_wrapped(Value::null(), token.span));
212            }
213            // Otherwise, push the token back and continue
214            token => {
215                self.return_token(token);
216            }
217        }
218
219        let mut output = Vec::new();
220        loop {
221            if let Some(expr) = maybe_await!(self.expression())? {
222                output.push(expr);
223            }
224            match maybe_await!(self.next_token())?.ok_or(ParseSyntaxError::UnexpectedEof)? {
225                token if token.lexeme == closing => {
226                    output.push(Syntax::new_wrapped(Value::null(), token.span));
227                    return Ok(Syntax::new_list(output, span));
228                }
229                token!(Lexeme::Period) => {
230                    let peek1 =
231                        maybe_await!(self.next_token())?.ok_or(ParseSyntaxError::UnexpectedEof)?;
232                    let peek2 =
233                        maybe_await!(self.next_token())?.ok_or(ParseSyntaxError::UnexpectedEof)?;
234                    match (peek1, peek2) {
235                        // Proper list with period:
236                        (token!(Lexeme::LParen, end_span), token!(Lexeme::RParen))
237                        | (token!(Lexeme::LBracket, end_span), token!(Lexeme::RBracket)) => {
238                            output.push(Syntax::new_wrapped(Value::null(), end_span));
239                            return Ok(Syntax::new_list(output, span));
240                        }
241                        // Improper list:
242                        (peek1, peek2) => {
243                            self.return_token(peek2);
244                            self.return_token(peek1);
245                        }
246                    }
247                    output.push(maybe_await!(self.get_sexpr())?);
248                    let last =
249                        maybe_await!(self.next_token())?.ok_or(ParseSyntaxError::UnexpectedEof)?;
250                    if last.lexeme == closing {
251                        return Ok(Syntax::new_list(output, span));
252                    } else {
253                        return Err(ParseSyntaxError::ExpectedClosingParen { span: last.span });
254                    }
255                }
256                token => self.return_token(token),
257            }
258        }
259    }
260
261    #[maybe_async]
262    fn vector(&mut self, span: Span) -> Result<Syntax, ParseSyntaxError> {
263        let mut output = Vec::new();
264        loop {
265            match maybe_await!(self.next_token())?.ok_or(ParseSyntaxError::UnexpectedEof)? {
266                token!(Lexeme::RParen) => return Ok(Syntax::new_vector(output, span)),
267                token => {
268                    self.return_token(token);
269                    if let Some(expr) = maybe_await!(self.expression())? {
270                        output.push(expr);
271                    }
272                }
273            }
274        }
275    }
276
277    #[maybe_async]
278    fn byte_vector(&mut self, span: Span) -> Result<Syntax, ParseSyntaxError> {
279        let mut output = Vec::new();
280        loop {
281            match maybe_await!(self.next_token())?.ok_or(ParseSyntaxError::UnexpectedEof)? {
282                token!(Lexeme::Number(num), span) => {
283                    let num: Number = num.try_into()?;
284                    if let Some(simple) = num.as_simple()
285                        && let Ok(byte) = u8::try_from(simple)
286                    {
287                        output.push(byte);
288                        continue;
289                    }
290                    return Err(ParseSyntaxError::NonByte { span });
291                }
292                token!(Lexeme::RParen) => {
293                    return Ok(Syntax::new_wrapped(Value::from(output), span));
294                }
295                token => {
296                    return Err(ParseSyntaxError::NonByte { span: token.span });
297                }
298            }
299        }
300    }
301
302    #[maybe_async]
303    fn alias(&mut self, alias: &str, span: Span) -> Result<Syntax, ParseSyntaxError> {
304        let expr = maybe_await!(self.get_sexpr())?;
305        let expr_span = expr.span().clone();
306        Ok(Syntax::new_list(
307            vec![
308                Syntax::new_identifier(alias, span.clone()),
309                expr,
310                Syntax::new_wrapped(Value::null(), expr_span),
311            ],
312            span,
313        ))
314    }
315}
316
317#[derive(Debug)]
318pub enum ParseSyntaxError {
319    UnexpectedEof,
320    ExpectedClosingParen { span: Span },
321    UnexpectedClosingParen { span: Span },
322    InvalidPeriodLocation { span: Span },
323    NonByte { span: Span },
324    UnclosedParen { span: Span },
325    CharTryFrom(CharTryFromError),
326    Lex(LexerError),
327    ParseNumberError(ParseNumberError),
328    UnexpectedToken { token: Box<Token> },
329}
330
331impl fmt::Display for ParseSyntaxError {
332    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
333        match self {
334            // Self::EmptyInput => write!(f, "cannot parse an empty list"),
335            Self::UnexpectedEof => write!(f, "unexpected end of file"),
336            Self::ExpectedClosingParen { span } => {
337                write!(f, "closing parenthesis not found at `{span}`")
338            }
339            Self::UnexpectedClosingParen { span } => {
340                write!(f, "unexpected closing parenthesis found at `{span}`")
341            }
342            Self::InvalidPeriodLocation { span } => {
343                write!(f, "invalid period found at location `{span}`")
344            }
345            Self::NonByte { span } => write!(
346                f,
347                "non byte value found in byte vector at location `{span}`",
348            ),
349            Self::UnclosedParen { span } => {
350                write!(f, "unclosed parenthesis at location `{span}`")
351            }
352            Self::CharTryFrom(e) => write!(f, "{e}"),
353            Self::Lex(e) => write!(f, "{e:?}"),
354            Self::ParseNumberError(e) => write!(f, "{e:?}"),
355            Self::UnexpectedToken { token } => {
356                write!(
357                    f,
358                    "unexpected token {:?} at location `{}`",
359                    token.lexeme, token.span
360                )
361            }
362        }
363    }
364}
365impl StdError for ParseSyntaxError {}
366
367impl From<LexerError> for ParseSyntaxError {
368    fn from(lex: LexerError) -> Self {
369        Self::Lex(lex)
370    }
371}
372
373impl From<CharTryFromError> for ParseSyntaxError {
374    fn from(e: CharTryFromError) -> Self {
375        Self::CharTryFrom(e)
376    }
377}
378
379impl From<ParseNumberError> for ParseSyntaxError {
380    fn from(e: ParseNumberError) -> Self {
381        Self::ParseNumberError(e)
382    }
383}