1use crate::{
2 num::Number,
4 ports::{PortData, PortInfo},
5 syntax::lex::ParseNumberError,
6 value::Value,
7};
8
9pub use super::lex::LexerError;
10use super::{
11 Span, Syntax,
12 lex::{Character, Lexeme, Lexer, Token},
13};
14use scheme_rs_macros::{maybe_async, maybe_await};
15use std::{char::CharTryFromError, error::Error as StdError, fmt};
16
17#[cfg(feature = "async")]
18use futures::future::BoxFuture;
19
20pub struct Parser<'a> {
21 lookahead: Vec<Token>,
24 lexer: Lexer<'a>,
25}
26
27macro_rules! token {
28 ( $pattern:pat ) => {
29 Token {
30 lexeme: $pattern,
31 ..
32 }
33 };
34 ( $pattern:pat, $span:pat ) => {
35 Token {
36 lexeme: $pattern,
37 span: $span,
38 }
39 };
40}
41
42impl<'a> Parser<'a> {
43 pub(crate) fn new(port_data: &'a mut PortData, port_info: &'a PortInfo, span: Span) -> Self {
44 Parser {
45 lookahead: Vec::new(),
46 lexer: Lexer::new(port_data, port_info, span),
47 }
48 }
49}
50
51impl Parser<'_> {
52 #[maybe_async]
53 fn next_token(&mut self) -> Result<Option<Token>, LexerError> {
54 if let Some(next) = self.lookahead.pop() {
55 Ok(Some(next))
56 } else {
57 maybe_await!(self.lexer.next_token())
58 }
59 }
60
61 pub(crate) fn curr_span(&self) -> Span {
62 self.lexer.curr_span()
63 }
64
65 fn return_token(&mut self, token: Token) {
66 self.lookahead.push(token)
67 }
68
69 #[cfg(feature = "async")]
70 pub fn expression(&mut self) -> BoxFuture<'_, Result<Option<Syntax>, ParseSyntaxError>> {
71 Box::pin(self.expression_inner())
72 }
73
74 #[cfg(not(feature = "async"))]
75 pub fn expression(&mut self) -> Result<Option<Syntax>, ParseSyntaxError> {
76 self.expression_inner()
77 }
78
79 #[maybe_async]
80 fn expression_inner(&mut self) -> Result<Option<Syntax>, ParseSyntaxError> {
81 match maybe_await!(self.next_token())?.ok_or(ParseSyntaxError::UnexpectedEof)? {
82 token!(Lexeme::Boolean(b), span) => Ok(Some(Syntax::new_wrapped(Value::from(b), span))),
84 token!(Lexeme::Character(Character::Literal(c)), span) => {
85 Ok(Some(Syntax::new_wrapped(Value::from(c), span)))
86 }
87 token!(Lexeme::Character(Character::Escaped(e)), span) => {
88 Ok(Some(Syntax::new_wrapped(Value::from(char::from(e)), span)))
89 }
90 token!(Lexeme::Character(Character::Unicode(u)), span) => {
91 Ok(Some(Syntax::new_wrapped(
92 Value::from(char::try_from(u32::from_str_radix(&u, 16).unwrap())?),
93 span,
94 )))
95 }
96 token!(Lexeme::String(s), span) => Ok(Some(Syntax::new_wrapped(Value::from(s), span))),
97 token!(Lexeme::Number(n), span) => Ok(Some(Syntax::new_wrapped(
98 Value::from(Number::try_from(n)?),
99 span,
100 ))),
101
102 token!(Lexeme::Identifier(ident), span) => {
104 Ok(Some(Syntax::new_identifier(&ident, span)))
105 }
106
107 token!(Lexeme::LParen, span) => {
109 Ok(Some(maybe_await!(self.list(span, Lexeme::RParen))?))
110 }
111 token!(Lexeme::LBracket, span) => {
112 Ok(Some(maybe_await!(self.list(span, Lexeme::RBracket))?))
113 }
114
115 token!(Lexeme::HashParen, span) => Ok(Some(maybe_await!(self.vector(span))?)),
117 token!(Lexeme::Vu8Paren, span) => Ok(Some(maybe_await!(self.byte_vector(span))?)),
118
119 token!(Lexeme::Quote, span) => Ok(Some(maybe_await!(self.alias("quote", span))?)),
121 token!(Lexeme::Backquote, span) => {
122 Ok(Some(maybe_await!(self.alias("quasiquote", span))?))
123 }
124 token!(Lexeme::Comma, span) => Ok(Some(maybe_await!(self.alias("unquote", span))?)),
125 token!(Lexeme::CommaAt, span) => {
126 Ok(Some(maybe_await!(self.alias("unquote-splicing", span))?))
127 }
128 token!(Lexeme::HashQuote, span) => Ok(Some(maybe_await!(self.alias("syntax", span))?)),
129 token!(Lexeme::HashBackquote, span) => {
130 Ok(Some(maybe_await!(self.alias("quasisyntax", span))?))
131 }
132 token!(Lexeme::HashComma, span) => {
133 Ok(Some(maybe_await!(self.alias("unsyntax", span))?))
134 }
135 token!(Lexeme::HashCommaAt, span) => {
136 Ok(Some(maybe_await!(self.alias("unsyntax-splicing", span))?))
137 }
138
139 token!(Lexeme::DatumComment) => {
141 let _ = maybe_await!(self.expression())?;
143 Ok(None)
144 }
145
146 token!(Lexeme::RParen, span) | token!(Lexeme::RBracket, span) => {
148 Err(ParseSyntaxError::UnexpectedClosingParen { span })
149 }
150
151 token!(Lexeme::Period, span) => Err(ParseSyntaxError::InvalidPeriodLocation { span }),
152 }
153 }
154
155 #[maybe_async]
156 pub fn get_sexpr(&mut self) -> Result<Syntax, ParseSyntaxError> {
157 loop {
158 if let Some(expr) = maybe_await!(self.expression())? {
159 return Ok(expr);
160 }
161 }
162 }
163
164 #[maybe_async]
165 pub fn get_sexpr_or_eof(&mut self) -> Result<Option<Syntax>, ParseSyntaxError> {
166 loop {
167 match maybe_await!(self.next_token()) {
169 Ok(None) => return Ok(None),
170 Err(err) => return Err(ParseSyntaxError::Lex(err)),
171 Ok(Some(token)) => self.return_token(token),
172 }
173
174 if let Some(expr) = maybe_await!(self.expression())? {
175 return Ok(Some(expr));
176 }
177 }
178 }
179
180 #[maybe_async]
181 pub fn all_sexprs(&mut self) -> Result<Syntax, ParseSyntaxError> {
182 let start_span = self.lexer.curr_span();
183 let mut sexprs = Vec::new();
184 loop {
185 match maybe_await!(self.next_token()) {
187 Ok(None) => {
188 let end_span = self.lexer.curr_span();
189 sexprs.push(Syntax::new_wrapped(Value::null(), end_span));
190 return Ok(Syntax::List {
191 list: sexprs,
192 span: start_span,
193 });
194 }
195 Err(err) => return Err(ParseSyntaxError::Lex(err)),
196 Ok(Some(token)) => self.return_token(token),
197 }
198
199 sexprs.push(maybe_await!(self.get_sexpr())?);
200 }
201 }
202
203 #[maybe_async]
204 fn list(&mut self, span: Span, closing: Lexeme) -> Result<Syntax, ParseSyntaxError> {
205 match maybe_await!(self.next_token())?.ok_or(ParseSyntaxError::UnexpectedEof)? {
206 token!(Lexeme::Period) => return maybe_await!(self.get_sexpr()),
209 token if token.lexeme == closing => {
212 return Ok(Syntax::new_wrapped(Value::null(), token.span));
213 }
214 token => {
216 self.return_token(token);
217 }
218 }
219
220 let mut output = Vec::new();
221 loop {
222 if let Some(expr) = maybe_await!(self.expression())? {
223 output.push(expr);
224 }
225 match maybe_await!(self.next_token())?.ok_or(ParseSyntaxError::UnexpectedEof)? {
226 token if token.lexeme == closing => {
227 output.push(Syntax::new_wrapped(Value::null(), token.span));
228 return Ok(Syntax::new_list(output, span));
229 }
230 token!(Lexeme::Period) => {
231 let peek1 =
232 maybe_await!(self.next_token())?.ok_or(ParseSyntaxError::UnexpectedEof)?;
233 let peek2 =
234 maybe_await!(self.next_token())?.ok_or(ParseSyntaxError::UnexpectedEof)?;
235 match (peek1, peek2) {
236 (token!(Lexeme::LParen, end_span), token!(Lexeme::RParen))
238 | (token!(Lexeme::LBracket, end_span), token!(Lexeme::RBracket)) => {
239 output.push(Syntax::new_wrapped(Value::null(), end_span));
240 return Ok(Syntax::new_list(output, span));
241 }
242 (peek1, peek2) => {
244 self.return_token(peek2);
245 self.return_token(peek1);
246 }
247 }
248 output.push(maybe_await!(self.get_sexpr())?);
249 let last =
250 maybe_await!(self.next_token())?.ok_or(ParseSyntaxError::UnexpectedEof)?;
251 if last.lexeme == closing {
252 return Ok(Syntax::new_list(output, span));
253 } else {
254 return Err(ParseSyntaxError::ExpectedClosingParen { span: last.span });
255 }
256 }
257 token => self.return_token(token),
258 }
259 }
260 }
261
262 #[maybe_async]
263 fn vector(&mut self, span: Span) -> Result<Syntax, ParseSyntaxError> {
264 let mut output = Vec::new();
265 loop {
266 match maybe_await!(self.next_token())?.ok_or(ParseSyntaxError::UnexpectedEof)? {
267 token!(Lexeme::RParen) => return Ok(Syntax::new_vector(output, span)),
268 token => {
269 self.return_token(token);
270 if let Some(expr) = maybe_await!(self.expression())? {
271 output.push(expr);
272 }
273 }
274 }
275 }
276 }
277
278 #[maybe_async]
279 fn byte_vector(&mut self, span: Span) -> Result<Syntax, ParseSyntaxError> {
280 let mut output = Vec::new();
281 loop {
282 match maybe_await!(self.next_token())?.ok_or(ParseSyntaxError::UnexpectedEof)? {
283 token!(Lexeme::Number(num), span) => {
284 let num: Number = num.try_into()?;
285 if let Some(simple) = num.as_simple()
286 && let Ok(byte) = u8::try_from(simple)
287 {
288 output.push(byte);
289 continue;
290 }
291 return Err(ParseSyntaxError::NonByte { span });
292 }
293 token!(Lexeme::RParen) => {
294 return Ok(Syntax::new_wrapped(Value::from(output), span));
295 }
296 token => {
297 return Err(ParseSyntaxError::NonByte { span: token.span });
298 }
299 }
300 }
301 }
302
303 #[maybe_async]
304 fn alias(&mut self, alias: &str, span: Span) -> Result<Syntax, ParseSyntaxError> {
305 let expr = maybe_await!(self.get_sexpr())?;
306 let expr_span = expr.span().clone();
307 Ok(Syntax::new_list(
308 vec![
309 Syntax::new_identifier(alias, span.clone()),
310 expr,
311 Syntax::new_wrapped(Value::null(), expr_span),
312 ],
313 span,
314 ))
315 }
316}
317
318#[derive(Debug)]
319pub enum ParseSyntaxError {
320 UnexpectedEof,
321 ExpectedClosingParen { span: Span },
322 UnexpectedClosingParen { span: Span },
323 InvalidPeriodLocation { span: Span },
324 NonByte { span: Span },
325 UnclosedParen { span: Span },
326 CharTryFrom(CharTryFromError),
327 Lex(LexerError),
328 ParseNumberError(ParseNumberError),
329 UnexpectedToken { token: Box<Token> },
330}
331
332impl fmt::Display for ParseSyntaxError {
333 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
334 match self {
335 Self::UnexpectedEof => write!(f, "unexpected end of file"),
337 Self::ExpectedClosingParen { span } => {
338 write!(f, "closing parenthesis not found at `{span}`")
339 }
340 Self::UnexpectedClosingParen { span } => {
341 write!(f, "unexpected closing parenthesis found at `{span}`")
342 }
343 Self::InvalidPeriodLocation { span } => {
344 write!(f, "invalid period found at location `{span}`")
345 }
346 Self::NonByte { span } => write!(
347 f,
348 "non byte value found in byte vector at location `{span}`",
349 ),
350 Self::UnclosedParen { span } => {
351 write!(f, "unclosed parenthesis at location `{span}`")
352 }
353 Self::CharTryFrom(e) => write!(f, "{e}"),
354 Self::Lex(e) => write!(f, "{e:?}"),
355 Self::ParseNumberError(e) => write!(f, "{e:?}"),
356 Self::UnexpectedToken { token } => {
357 write!(
358 f,
359 "unexpected token {:?} at location `{}`",
360 token.lexeme, token.span
361 )
362 }
363 }
364 }
365}
366impl StdError for ParseSyntaxError {}
367
368impl From<LexerError> for ParseSyntaxError {
369 fn from(lex: LexerError) -> Self {
370 Self::Lex(lex)
371 }
372}
373
374impl From<CharTryFromError> for ParseSyntaxError {
375 fn from(e: CharTryFromError) -> Self {
376 Self::CharTryFrom(e)
377 }
378}
379
380impl From<ParseNumberError> for ParseSyntaxError {
381 fn from(e: ParseNumberError) -> Self {
382 Self::ParseNumberError(e)
383 }
384}