scheme_rs/
parse.rs

1use crate::{
2    ast::Literal,
3    lex::{Fragment, InputSpan, Lexeme, Token},
4    num::Number,
5    syntax::Syntax,
6};
7use rug::Integer;
8
9#[derive(Debug)]
10pub enum ParseError<'a> {
11    EmptyInput,
12    UnexpectedEndOfFile,
13    ExpectedClosingParen { span: InputSpan<'a> },
14    ParseNumberError { value: String, span: InputSpan<'a> },
15    InvalidHexValue { value: String, span: InputSpan<'a> },
16    InvalidDocCommentLocation { span: InputSpan<'a> },
17    InvalidPeriodLocation { span: InputSpan<'a> },
18    UnclosedParen { span: InputSpan<'a> },
19    DocCommentMustPrecedeDefine,
20}
21
22impl<'a> ParseError<'a> {
23    fn invalid_period(token: &Token<'a>) -> Self {
24        Self::InvalidPeriodLocation {
25            span: token.span.clone(),
26        }
27    }
28
29    fn invalid_doc_comment(token: &Token<'a>) -> Self {
30        Self::InvalidDocCommentLocation {
31            span: token.span.clone(),
32        }
33    }
34
35    fn unclosed_paren(token: &Token<'a>) -> Self {
36        Self::UnclosedParen {
37            span: token.span.clone(),
38        }
39    }
40}
41
42macro_rules! token {
43    ( $pattern:pat ) => {
44        Token {
45            lexeme: $pattern,
46            ..
47        }
48    };
49}
50
51pub fn expression<'a, 'b>(i: &'b [Token<'a>]) -> Result<(&'b [Token<'a>], Syntax), ParseError<'a>> {
52    match i {
53        // Calling expression with an empty list is an error
54        [] => Err(ParseError::EmptyInput),
55        // Literals:
56        [b @ token!(Lexeme::Boolean(_)), tail @ ..] => {
57            Ok((tail, Syntax::new_literal(boolean(b)?, b.span.clone())))
58        }
59        [n @ token!(Lexeme::Number(_)), tail @ ..] => {
60            Ok((tail, Syntax::new_literal(number(n)?, n.span.clone())))
61        }
62        [s @ token!(Lexeme::String(_)), tail @ ..] => {
63            Ok((tail, Syntax::new_literal(string(s)?, s.span.clone())))
64        }
65        // Identifiers:
66        [i @ token!(Lexeme::Identifier(_)), tail @ ..] => Ok((
67            tail,
68            Syntax::new_identifier(i.lexeme.to_ident(), i.span.clone()),
69        )),
70        // Lists:
71        [n @ token!(Lexeme::LParen), token!(Lexeme::RParen), tail @ ..] => {
72            Ok((tail, Syntax::new_null(n.span.clone())))
73        }
74        [n @ token!(Lexeme::LBracket), token!(Lexeme::RBracket), tail @ ..] => {
75            Ok((tail, Syntax::new_null(n.span.clone())))
76        }
77        [p @ token!(Lexeme::LParen), tail @ ..] => match list(tail, p.span.clone(), Lexeme::RParen)
78        {
79            Err(ParseListError::UnclosedParen) => Err(ParseError::unclosed_paren(p)),
80            Err(ParseListError::ParseError(err)) => Err(err),
81            Ok(ok) => Ok(ok),
82        },
83        [p @ token!(Lexeme::LBracket), tail @ ..] => {
84            match list(tail, p.span.clone(), Lexeme::RBracket) {
85                Err(ParseListError::UnclosedParen) => Err(ParseError::unclosed_paren(p)),
86                Err(ParseListError::ParseError(err)) => Err(err),
87                Ok(ok) => Ok(ok),
88            }
89        }
90        // Vectors:
91        [v @ token!(Lexeme::HashParen), tail @ ..] => match vector(tail, v.span.clone()) {
92            Err(ParseVectorError::UnclosedParen) => Err(ParseError::unclosed_paren(v)),
93            Err(ParseVectorError::ParseError(err)) => Err(err),
94            Ok(ok) => Ok(ok),
95        },
96        // Quote:
97        [q @ token!(Lexeme::Quote), tail @ ..] => {
98            let (tail, expr) = expression(tail)?;
99            let expr_span = expr.span().clone();
100            Ok((
101                tail,
102                Syntax::new_list(
103                    vec![
104                        Syntax::new_identifier("quote", q.span.clone()),
105                        expr,
106                        Syntax::new_null(expr_span),
107                    ],
108                    q.span.clone(),
109                ),
110            ))
111        }
112        // Syntax:
113        [s @ token!(Lexeme::HashTick), tail @ ..] => {
114            let (tail, expr) = expression(tail)?;
115            let expr_span = expr.span().clone();
116            Ok((
117                tail,
118                Syntax::new_list(
119                    vec![
120                        Syntax::new_identifier("syntax", s.span.clone()),
121                        expr,
122                        Syntax::new_null(expr_span),
123                    ],
124                    s.span.clone(),
125                ),
126            ))
127        }
128        // Invalid locations:
129        [d @ token!(Lexeme::Period), ..] => Err(ParseError::invalid_period(d)),
130        [d @ token!(Lexeme::DocComment(_)), ..] => Err(ParseError::invalid_doc_comment(d)),
131        x => todo!("Not implemented: {x:#?}"),
132    }
133}
134
135#[derive(Debug)]
136enum ParseListError<'a> {
137    UnclosedParen,
138    ParseError(ParseError<'a>),
139}
140
141impl<'a> From<ParseError<'a>> for ParseListError<'a> {
142    fn from(pe: ParseError<'a>) -> Self {
143        Self::ParseError(pe)
144    }
145}
146
147fn list<'a, 'b>(
148    mut i: &'b [Token<'a>],
149    span: InputSpan<'a>,
150    closing: Lexeme<'static>,
151) -> Result<(&'b [Token<'a>], Syntax), ParseListError<'a>> {
152    let mut output = Vec::new();
153    loop {
154        if i.is_empty() {
155            return Err(ParseListError::UnclosedParen);
156        }
157
158        let (remaining, expr) = expression(i)?;
159        output.push(expr);
160
161        match remaining {
162            // Proper lists:
163            [token, tail @ ..] if token.lexeme == closing => {
164                output.push(Syntax::new_null(token.span.clone()));
165                return Ok((tail, Syntax::new_list(output, span)));
166            }
167            [token!(Lexeme::Period), end @ token!(Lexeme::LParen), token!(Lexeme::RParen), token, tail @ ..]
168            | [token!(Lexeme::Period), end @ token!(Lexeme::LBracket), token!(Lexeme::RBracket), token, tail @ ..]
169                if token.lexeme == closing =>
170            {
171                output.push(Syntax::new_null(end.span.clone()));
172                return Ok((tail, Syntax::new_list(output, span)));
173            }
174            // Improper lists:
175            [token!(Lexeme::Period), tail @ ..] => {
176                let (remaining, expr) = expression(tail)?;
177                output.push(expr);
178                return match remaining {
179                    [] => Err(ParseListError::ParseError(ParseError::UnexpectedEndOfFile)),
180                    [token!(Lexeme::RParen), tail @ ..] => {
181                        Ok((tail, Syntax::new_list(output, span)))
182                    }
183                    [unexpected, ..] => Err(ParseListError::ParseError(
184                        ParseError::ExpectedClosingParen {
185                            span: unexpected.span.clone(),
186                        },
187                    )),
188                };
189            }
190            _ => (),
191        }
192        i = remaining;
193    }
194}
195
196#[derive(Debug)]
197enum ParseVectorError<'a> {
198    UnclosedParen,
199    ParseError(ParseError<'a>),
200}
201
202impl<'a> From<ParseError<'a>> for ParseVectorError<'a> {
203    fn from(pe: ParseError<'a>) -> Self {
204        Self::ParseError(pe)
205    }
206}
207
208fn vector<'a, 'b>(
209    mut i: &'b [Token<'a>],
210    span: InputSpan<'a>,
211) -> Result<(&'b [Token<'a>], Syntax), ParseVectorError<'a>> {
212    let mut output = Vec::new();
213    loop {
214        match i {
215            [] => return Err(ParseVectorError::UnclosedParen),
216            [token!(Lexeme::RParen), tail @ ..] => {
217                return Ok((tail, Syntax::new_vector(output, span)))
218            }
219            _ => (),
220        }
221
222        let (remaining, expr) = expression(i)?;
223        output.push(expr);
224        i = remaining;
225    }
226}
227
228fn boolean<'a>(i: &Token<'a>) -> Result<Literal, ParseError<'a>> {
229    Ok(Literal::Boolean(i.lexeme.to_boolean()))
230}
231
232fn number<'a>(i: &Token<'a>) -> Result<Literal, ParseError<'a>> {
233    let number = i.lexeme.to_number();
234    // TODO: Parse correctly
235    let number: Integer = number.parse().unwrap();
236    match number.to_i64() {
237        Some(fixed) => Ok(Literal::Number(Number::FixedInteger(fixed))),
238        None => Ok(Literal::Number(Number::BigInteger(number))),
239    }
240}
241
242fn string<'a>(i: &Token<'a>) -> Result<Literal, ParseError<'a>> {
243    let fragments = i.lexeme.to_string();
244    let mut output = String::new();
245    for fragment in fragments {
246        match fragment {
247            Fragment::Escaped(c) => output.push(*c),
248            Fragment::Unescaped(s) => output.push_str(s),
249            Fragment::HexValue(hex) => {
250                let Ok(hex_value) = u32::from_str_radix(hex, 16) else {
251                    return Err(ParseError::InvalidHexValue {
252                        value: hex.to_string(),
253                        span: i.span.clone(),
254                    });
255                };
256                let Some(c) = char::from_u32(hex_value) else {
257                    return Err(ParseError::InvalidHexValue {
258                        value: hex.to_string(),
259                        span: i.span.clone(),
260                    });
261                };
262                output.push(c);
263            }
264        }
265    }
266    Ok(Literal::String(output))
267}