rust_lisp/
parser.rs

1use crate::{
2    lisp,
3    model::{FloatType, IntType, List, Symbol, Value},
4};
5
6use std::fmt::Display;
7
8/// Parse a string of Lisp code into a series of s-expressions. There
9/// are more than one expressions when the base string has more than one
10/// independent parenthesized lists at its root.
11pub fn parse(code: &str) -> impl Iterator<Item = Result<Value, ParseError>> + '_ {
12    let mut index = 0;
13    index = consume_whitespace_and_comments(code, index);
14
15    std::iter::from_fn(move || {
16        if let Some(res) = parse_expression(code, index) {
17            if let Ok(res) = res {
18                index = res.index;
19                index = consume_whitespace_and_comments(code, index);
20
21                Some(Ok(res.parsed.into_value()))
22            } else {
23                Some(Err(res.unwrap_err()))
24            }
25        } else {
26            None // TODO: Err if we don't parse the whole input?
27        }
28    })
29}
30
31/// A slightly more convenient data structure for building the parse tree, before
32/// eventually converting it into proper s-expressions.
33#[derive(Debug, Clone)]
34enum ParseTree {
35    Atom(Value),
36    List(Vec<ParseTree>),
37    Quoted(Box<ParseTree>),
38    Comma(Box<ParseTree>),
39}
40
41impl ParseTree {
42    pub fn into_value(self) -> Value {
43        match self {
44            ParseTree::Atom(value) => value,
45            ParseTree::List(vec) => Value::List(
46                vec.into_iter()
47                    .map(|parse_tree| parse_tree.into_value())
48                    .collect::<List>(),
49            ),
50            ParseTree::Quoted(inner) => lisp! { (quote {inner.into_value()}) },
51            ParseTree::Comma(inner) => lisp! { (comma {inner.into_value()}) },
52        }
53    }
54}
55
56/**
57 * An error that occurred while parsing a string as lisp code
58 */
59#[derive(Debug, Clone, PartialEq, Eq)]
60pub struct ParseError {
61    pub msg: String,
62    // pub line: i32,
63}
64
65impl Display for ParseError {
66    fn fmt(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result {
67        return write!(formatter, "Parse error: {}", self.msg);
68    }
69}
70
71#[derive(Clone, Debug)]
72struct ParsedAndIndex {
73    pub parsed: ParseTree,
74    pub index: usize,
75}
76
77type ParseResult = Option<Result<ParsedAndIndex, ParseError>>;
78type ConsumeResult = Option<usize>;
79
80fn parse_expression(code: &str, index: usize) -> ParseResult {
81    for func in [parse_list, parse_atom] {
82        let res = func(code, index);
83
84        if res.is_some() {
85            return res;
86        }
87    }
88
89    None
90}
91
92fn parse_list(code: &str, index: usize) -> ParseResult {
93    let mut index = consume(code, index, "(")?;
94    let mut members = vec![];
95
96    index = consume_whitespace_and_comments(code, index);
97
98    while let Some(res) = parse_expression(code, index) {
99        if let Ok(res) = res {
100            index = res.index;
101            members.push(res.parsed);
102            index = consume_whitespace_and_comments(code, index);
103        } else {
104            return Some(res);
105        }
106    }
107
108    if let Some(index) = consume(code, index, ")") {
109        Some(Ok(ParsedAndIndex {
110            parsed: ParseTree::List(members),
111            index,
112        }))
113    } else {
114        Some(Err(ParseError {
115            msg: format!("Unclosed list at index {}", index),
116        }))
117    }
118}
119
120fn parse_atom(code: &str, index: usize) -> ParseResult {
121    for func in [
122        parse_quoted,
123        parse_comma,
124        parse_nil,
125        parse_false,
126        parse_true,
127        parse_number,
128        parse_string,
129        parse_symbol,
130    ] {
131        let res = func(code, index);
132
133        if res.is_some() {
134            return res;
135        }
136    }
137
138    None
139}
140
141fn parse_quoted(code: &str, index: usize) -> ParseResult {
142    let index = consume(code, index, "'")?;
143    let res = parse_expression(code, index)?;
144
145    if let Ok(ParsedAndIndex { parsed, index }) = res {
146        Some(Ok(ParsedAndIndex {
147            parsed: ParseTree::Quoted(Box::new(parsed)),
148            index,
149        }))
150    } else {
151        Some(res)
152    }
153}
154
155fn parse_comma(code: &str, index: usize) -> ParseResult {
156    let index = consume(code, index, ",")?;
157    let res = parse_expression(code, index)?;
158
159    if let Ok(ParsedAndIndex { parsed, index }) = res {
160        Some(Ok(ParsedAndIndex {
161            parsed: ParseTree::Comma(Box::new(parsed)),
162            index,
163        }))
164    } else {
165        Some(res)
166    }
167}
168
169fn parse_nil(code: &str, index: usize) -> ParseResult {
170    let index = consume(code, index, "nil")?;
171
172    if next_char_is_break(code, index) {
173        Some(Ok(ParsedAndIndex {
174            parsed: ParseTree::Atom(Value::NIL),
175            index,
176        }))
177    } else {
178        None
179    }
180}
181
182fn parse_false(code: &str, index: usize) -> ParseResult {
183    let index = consume(code, index, "f")?;
184
185    if next_char_is_break(code, index) {
186        Some(Ok(ParsedAndIndex {
187            parsed: ParseTree::Atom(Value::False),
188            index,
189        }))
190    } else {
191        None
192    }
193}
194
195fn parse_true(code: &str, index: usize) -> ParseResult {
196    let index = consume(code, index, "t")?;
197
198    if next_char_is_break(code, index) {
199        Some(Ok(ParsedAndIndex {
200            parsed: ParseTree::Atom(Value::True),
201            index,
202        }))
203    } else {
204        None
205    }
206}
207
208fn parse_number(code: &str, index: usize) -> ParseResult {
209    let (front_last_index, front_last_char) = consume_while(code, index, |(index, ch)| {
210        (index == 0 && ch == '-') || ch.is_numeric()
211    })?;
212
213    if front_last_char.is_numeric() {
214        let front_last_index = front_last_index + 1;
215
216        let back_end = consume_while(code, front_last_index, |(index, ch)| {
217            (index == 0 && ch == '.') || ch.is_numeric()
218        });
219
220        if let Some((back_last_index, _)) = back_end {
221            let back_last_index = back_last_index + 1;
222
223            if back_last_index >= front_last_index + 2 {
224                if next_char_is_break(code, back_last_index) {
225                    if let Ok(float) = code
226                        .get(index..back_last_index)
227                        .unwrap_or("")
228                        .parse::<FloatType>()
229                    {
230                        return Some(Ok(ParsedAndIndex {
231                            parsed: ParseTree::Atom(Value::Float(float)),
232                            index: back_last_index,
233                        }));
234                    }
235                }
236            } else if code.as_bytes().get(back_last_index - 1) == Some(&b'.') {
237                return Some(Err(ParseError {
238                    msg: format!(
239                        "Expected decimal value after '.' at index {}",
240                        back_last_index - 1
241                    ),
242                }));
243            }
244        }
245
246        if next_char_is_break(code, front_last_index) {
247            if let Ok(int) = code
248                .get(index..front_last_index)
249                .unwrap_or("")
250                .parse::<IntType>()
251            {
252                return Some(Ok(ParsedAndIndex {
253                    parsed: ParseTree::Atom(Value::Int(int)),
254                    index: front_last_index,
255                }));
256            }
257        }
258    }
259
260    None
261}
262
263fn parse_string(code: &str, index: usize) -> ParseResult {
264    let (last_index, _) = consume_while(code, index, |(index, ch)| {
265        (index == 0 && ch == '"') || (index > 0 && ch != '"')
266    })?;
267
268    if last_index > index {
269        if code.as_bytes().get(last_index + 1) == Some(&b'"') {
270            Some(Ok(ParsedAndIndex {
271                parsed: ParseTree::Atom(Value::String(
272                    code.get(index + 1..last_index + 1).unwrap_or("").to_owned(),
273                )),
274                index: last_index + 2,
275            }))
276        } else {
277            Some(Err(ParseError {
278                msg: format!("Unclosed string at index {}", last_index),
279            }))
280        }
281    } else {
282        None
283    }
284}
285
286fn parse_symbol(code: &str, index: usize) -> ParseResult {
287    let (last_index, _) = consume_while(code, index, |(index, ch)| {
288        (index == 0 && is_symbol_start(ch)) || (index > 0 && is_symbolic(ch))
289    })?;
290    let last_index = last_index + 1;
291
292    if last_index > index {
293        Some(Ok(ParsedAndIndex {
294            parsed: ParseTree::Atom(Value::Symbol(Symbol(
295                code.get(index..last_index).unwrap_or("").to_owned(),
296            ))),
297            index: last_index,
298        }))
299    } else {
300        None
301    }
302}
303
304fn consume(code: &str, index: usize, s: &str) -> ConsumeResult {
305    let slice = code.get(index..).unwrap_or("");
306
307    if slice.len() >= s.len()
308        && slice
309            .chars()
310            .zip(s.chars())
311            .all(|(a, b)| a.to_ascii_lowercase() == b.to_ascii_lowercase())
312    {
313        return Some(index + s.len());
314    } else {
315        return None;
316    }
317}
318
319fn consume_whitespace_and_comments(code: &str, index: usize) -> usize {
320    let mut semicolons = 0;
321
322    consume_while(code, index, move |(_, ch)| {
323        if ch == ';' {
324            semicolons += 1;
325        } else if semicolons < 2 || ch == '\n' {
326            semicolons = 0;
327        }
328
329        return ch.is_whitespace() || ch == ';' || semicolons >= 2;
330    })
331    .map(|(index, _)| index + 1)
332    .unwrap_or(index)
333}
334
335fn consume_while<F: FnMut((usize, char)) -> bool>(
336    code: &str,
337    index: usize,
338    mut pred: F,
339) -> Option<(usize, char)> {
340    code.get(index..)
341        .unwrap_or("")
342        .char_indices()
343        .take_while(|(i, c)| pred((*i, *c)))
344        .last()
345        .map(|(last_index, ch)| (last_index + index, ch))
346}
347
348fn is_symbol_start(c: char) -> bool {
349    !c.is_numeric() && is_symbolic(c)
350}
351
352fn is_symbolic(c: char) -> bool {
353    !c.is_whitespace() && !SPECIAL_TOKENS.iter().any(|t| *t == c)
354}
355
356fn next_char_is_break(code: &str, index: usize) -> bool {
357    code.get(index..)
358        .map(|s| s.chars().next())
359        .flatten()
360        .map(|ch| ch.is_whitespace() || SPECIAL_TOKENS.iter().any(|t| *t == ch))
361        .unwrap_or(true)
362}
363
364const SPECIAL_TOKENS: [char; 5] = ['(', ')', '\'', ',', ';'];