1use crate::{
2 num::Number,
3 ports::{PortData, PortInfo},
4 syntax::lex::ParseNumberError,
5 value::Value,
6};
7
8pub use super::lex::LexerError;
9use super::{
10 Span, Syntax,
11 lex::{Character, Lexeme, Lexer, Token},
12};
13use scheme_rs_macros::{maybe_async, maybe_await};
14use std::{char::CharTryFromError, error::Error as StdError, fmt};
15
16#[cfg(feature = "async")]
17use futures::future::BoxFuture;
18
19pub struct Parser<'a> {
20 lookahead: Vec<Token>,
23 lexer: Lexer<'a>,
24}
25
26macro_rules! token {
27 ( $pattern:pat ) => {
28 Token {
29 lexeme: $pattern,
30 ..
31 }
32 };
33 ( $pattern:pat, $span:pat ) => {
34 Token {
35 lexeme: $pattern,
36 span: $span,
37 }
38 };
39}
40
41impl<'a> Parser<'a> {
42 pub(crate) fn new(port_data: &'a mut PortData, port_info: &'a PortInfo, span: Span) -> Self {
43 Parser {
44 lookahead: Vec::new(),
45 lexer: Lexer::new(port_data, port_info, span),
46 }
47 }
48}
49
50impl Parser<'_> {
51 #[maybe_async]
52 fn next_token(&mut self) -> Result<Option<Token>, LexerError> {
53 if let Some(next) = self.lookahead.pop() {
54 Ok(Some(next))
55 } else {
56 maybe_await!(self.lexer.next_token())
57 }
58 }
59
60 pub(crate) fn curr_span(&self) -> Span {
61 self.lexer.curr_span()
62 }
63
64 fn return_token(&mut self, token: Token) {
65 self.lookahead.push(token)
66 }
67
68 #[cfg(feature = "async")]
69 pub fn expression(&mut self) -> BoxFuture<'_, Result<Option<Syntax>, ParseSyntaxError>> {
70 Box::pin(self.expression_inner())
71 }
72
73 #[cfg(not(feature = "async"))]
74 pub fn expression(&mut self) -> Result<Option<Syntax>, ParseSyntaxError> {
75 self.expression_inner()
76 }
77
78 #[maybe_async]
79 fn expression_inner(&mut self) -> Result<Option<Syntax>, ParseSyntaxError> {
80 match maybe_await!(self.next_token())?.ok_or(ParseSyntaxError::UnexpectedEof)? {
81 token!(Lexeme::Boolean(b), span) => Ok(Some(Syntax::new_wrapped(Value::from(b), span))),
83 token!(Lexeme::Character(Character::Literal(c)), span) => {
84 Ok(Some(Syntax::new_wrapped(Value::from(c), span)))
85 }
86 token!(Lexeme::Character(Character::Escaped(e)), span) => {
87 Ok(Some(Syntax::new_wrapped(Value::from(char::from(e)), span)))
88 }
89 token!(Lexeme::Character(Character::Unicode(u)), span) => {
90 Ok(Some(Syntax::new_wrapped(
91 Value::from(char::try_from(u32::from_str_radix(&u, 16).unwrap())?),
92 span,
93 )))
94 }
95 token!(Lexeme::String(s), span) => Ok(Some(Syntax::new_wrapped(Value::from(s), span))),
96 token!(Lexeme::Number(n), span) => Ok(Some(Syntax::new_wrapped(
97 Value::from(Number::try_from(n)?),
98 span,
99 ))),
100
101 token!(Lexeme::Identifier(ident), span) => {
103 Ok(Some(Syntax::new_identifier(&ident, span)))
104 }
105
106 token!(Lexeme::LParen, span) => {
108 Ok(Some(maybe_await!(self.list(span, Lexeme::RParen))?))
109 }
110 token!(Lexeme::LBracket, span) => {
111 Ok(Some(maybe_await!(self.list(span, Lexeme::RBracket))?))
112 }
113
114 token!(Lexeme::HashParen, span) => Ok(Some(maybe_await!(self.vector(span))?)),
116 token!(Lexeme::Vu8Paren, span) => Ok(Some(maybe_await!(self.byte_vector(span))?)),
117
118 token!(Lexeme::Quote, span) => Ok(Some(maybe_await!(self.alias("quote", span))?)),
120 token!(Lexeme::Backquote, span) => {
121 Ok(Some(maybe_await!(self.alias("quasiquote", span))?))
122 }
123 token!(Lexeme::Comma, span) => Ok(Some(maybe_await!(self.alias("unquote", span))?)),
124 token!(Lexeme::CommaAt, span) => {
125 Ok(Some(maybe_await!(self.alias("unquote-splicing", span))?))
126 }
127 token!(Lexeme::HashQuote, span) => Ok(Some(maybe_await!(self.alias("syntax", span))?)),
128 token!(Lexeme::HashBackquote, span) => {
129 Ok(Some(maybe_await!(self.alias("quasisyntax", span))?))
130 }
131 token!(Lexeme::HashComma, span) => {
132 Ok(Some(maybe_await!(self.alias("unsyntax", span))?))
133 }
134 token!(Lexeme::HashCommaAt, span) => {
135 Ok(Some(maybe_await!(self.alias("unsyntax-splicing", span))?))
136 }
137
138 token!(Lexeme::DatumComment) => {
140 let _ = maybe_await!(self.expression())?;
142 Ok(None)
143 }
144
145 token!(Lexeme::RParen, span) | token!(Lexeme::RBracket, span) => {
147 Err(ParseSyntaxError::UnexpectedClosingParen { span })
148 }
149
150 token!(Lexeme::Period, span) => Err(ParseSyntaxError::InvalidPeriodLocation { span }),
151 }
152 }
153
154 #[maybe_async]
155 pub fn get_sexpr(&mut self) -> Result<Syntax, ParseSyntaxError> {
156 loop {
157 if let Some(expr) = maybe_await!(self.expression())? {
158 return Ok(expr);
159 }
160 }
161 }
162
163 #[maybe_async]
164 pub fn get_sexpr_or_eof(&mut self) -> Result<Option<Syntax>, ParseSyntaxError> {
165 loop {
166 match maybe_await!(self.next_token()) {
168 Ok(None) => return Ok(None),
169 Err(err) => return Err(ParseSyntaxError::Lex(err)),
170 Ok(Some(token)) => self.return_token(token),
171 }
172
173 if let Some(expr) = maybe_await!(self.expression())? {
174 return Ok(Some(expr));
175 }
176 }
177 }
178
179 #[maybe_async]
180 pub fn all_sexprs(&mut self) -> Result<Syntax, ParseSyntaxError> {
181 let start_span = self.lexer.curr_span();
182 let mut sexprs = Vec::new();
183 loop {
184 match maybe_await!(self.next_token()) {
186 Ok(None) => {
187 let end_span = self.lexer.curr_span();
188 sexprs.push(Syntax::new_wrapped(Value::null(), end_span));
189 return Ok(Syntax::List {
190 list: sexprs,
191 span: start_span,
192 });
193 }
194 Err(err) => return Err(ParseSyntaxError::Lex(err)),
195 Ok(Some(token)) => self.return_token(token),
196 }
197
198 sexprs.push(maybe_await!(self.get_sexpr())?);
199 }
200 }
201
202 #[maybe_async]
203 fn list(&mut self, span: Span, closing: Lexeme) -> Result<Syntax, ParseSyntaxError> {
204 match maybe_await!(self.next_token())?.ok_or(ParseSyntaxError::UnexpectedEof)? {
205 token!(Lexeme::Period) => return maybe_await!(self.get_sexpr()),
208 token if token.lexeme == closing => {
211 return Ok(Syntax::new_wrapped(Value::null(), token.span));
212 }
213 token => {
215 self.return_token(token);
216 }
217 }
218
219 let mut output = Vec::new();
220 loop {
221 if let Some(expr) = maybe_await!(self.expression())? {
222 output.push(expr);
223 }
224 match maybe_await!(self.next_token())?.ok_or(ParseSyntaxError::UnexpectedEof)? {
225 token if token.lexeme == closing => {
226 output.push(Syntax::new_wrapped(Value::null(), token.span));
227 return Ok(Syntax::new_list(output, span));
228 }
229 token!(Lexeme::Period) => {
230 let peek1 =
231 maybe_await!(self.next_token())?.ok_or(ParseSyntaxError::UnexpectedEof)?;
232 let peek2 =
233 maybe_await!(self.next_token())?.ok_or(ParseSyntaxError::UnexpectedEof)?;
234 match (peek1, peek2) {
235 (token!(Lexeme::LParen, end_span), token!(Lexeme::RParen))
237 | (token!(Lexeme::LBracket, end_span), token!(Lexeme::RBracket)) => {
238 output.push(Syntax::new_wrapped(Value::null(), end_span));
239 return Ok(Syntax::new_list(output, span));
240 }
241 (peek1, peek2) => {
243 self.return_token(peek2);
244 self.return_token(peek1);
245 }
246 }
247 output.push(maybe_await!(self.get_sexpr())?);
248 let last =
249 maybe_await!(self.next_token())?.ok_or(ParseSyntaxError::UnexpectedEof)?;
250 if last.lexeme == closing {
251 return Ok(Syntax::new_list(output, span));
252 } else {
253 return Err(ParseSyntaxError::ExpectedClosingParen { span: last.span });
254 }
255 }
256 token => self.return_token(token),
257 }
258 }
259 }
260
261 #[maybe_async]
262 fn vector(&mut self, span: Span) -> Result<Syntax, ParseSyntaxError> {
263 let mut output = Vec::new();
264 loop {
265 match maybe_await!(self.next_token())?.ok_or(ParseSyntaxError::UnexpectedEof)? {
266 token!(Lexeme::RParen) => return Ok(Syntax::new_vector(output, span)),
267 token => {
268 self.return_token(token);
269 if let Some(expr) = maybe_await!(self.expression())? {
270 output.push(expr);
271 }
272 }
273 }
274 }
275 }
276
277 #[maybe_async]
278 fn byte_vector(&mut self, span: Span) -> Result<Syntax, ParseSyntaxError> {
279 let mut output = Vec::new();
280 loop {
281 match maybe_await!(self.next_token())?.ok_or(ParseSyntaxError::UnexpectedEof)? {
282 token!(Lexeme::Number(num), span) => {
283 let num: Number = num.try_into()?;
284 if let Some(simple) = num.as_simple()
285 && let Ok(byte) = u8::try_from(simple)
286 {
287 output.push(byte);
288 continue;
289 }
290 return Err(ParseSyntaxError::NonByte { span });
291 }
292 token!(Lexeme::RParen) => {
293 return Ok(Syntax::new_wrapped(Value::from(output), span));
294 }
295 token => {
296 return Err(ParseSyntaxError::NonByte { span: token.span });
297 }
298 }
299 }
300 }
301
302 #[maybe_async]
303 fn alias(&mut self, alias: &str, span: Span) -> Result<Syntax, ParseSyntaxError> {
304 let expr = maybe_await!(self.get_sexpr())?;
305 let expr_span = expr.span().clone();
306 Ok(Syntax::new_list(
307 vec![
308 Syntax::new_identifier(alias, span.clone()),
309 expr,
310 Syntax::new_wrapped(Value::null(), expr_span),
311 ],
312 span,
313 ))
314 }
315}
316
317#[derive(Debug)]
318pub enum ParseSyntaxError {
319 UnexpectedEof,
320 ExpectedClosingParen { span: Span },
321 UnexpectedClosingParen { span: Span },
322 InvalidPeriodLocation { span: Span },
323 NonByte { span: Span },
324 UnclosedParen { span: Span },
325 CharTryFrom(CharTryFromError),
326 Lex(LexerError),
327 ParseNumberError(ParseNumberError),
328 UnexpectedToken { token: Box<Token> },
329}
330
331impl fmt::Display for ParseSyntaxError {
332 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
333 match self {
334 Self::UnexpectedEof => write!(f, "unexpected end of file"),
336 Self::ExpectedClosingParen { span } => {
337 write!(f, "closing parenthesis not found at `{span}`")
338 }
339 Self::UnexpectedClosingParen { span } => {
340 write!(f, "unexpected closing parenthesis found at `{span}`")
341 }
342 Self::InvalidPeriodLocation { span } => {
343 write!(f, "invalid period found at location `{span}`")
344 }
345 Self::NonByte { span } => write!(
346 f,
347 "non byte value found in byte vector at location `{span}`",
348 ),
349 Self::UnclosedParen { span } => {
350 write!(f, "unclosed parenthesis at location `{span}`")
351 }
352 Self::CharTryFrom(e) => write!(f, "{e}"),
353 Self::Lex(e) => write!(f, "{e:?}"),
354 Self::ParseNumberError(e) => write!(f, "{e:?}"),
355 Self::UnexpectedToken { token } => {
356 write!(
357 f,
358 "unexpected token {:?} at location `{}`",
359 token.lexeme, token.span
360 )
361 }
362 }
363 }
364}
365impl StdError for ParseSyntaxError {}
366
367impl From<LexerError> for ParseSyntaxError {
368 fn from(lex: LexerError) -> Self {
369 Self::Lex(lex)
370 }
371}
372
373impl From<CharTryFromError> for ParseSyntaxError {
374 fn from(e: CharTryFromError) -> Self {
375 Self::CharTryFrom(e)
376 }
377}
378
379impl From<ParseNumberError> for ParseSyntaxError {
380 fn from(e: ParseNumberError) -> Self {
381 Self::ParseNumberError(e)
382 }
383}