1use crate::{
2 lisp,
3 model::{FloatType, IntType, List, Symbol, Value},
4};
5
6use std::fmt::Display;
7
8pub fn parse(code: &str) -> impl Iterator<Item = Result<Value, ParseError>> + '_ {
12 let mut index = 0;
13 index = consume_whitespace_and_comments(code, index);
14
15 std::iter::from_fn(move || {
16 if let Some(res) = parse_expression(code, index) {
17 if let Ok(res) = res {
18 index = res.index;
19 index = consume_whitespace_and_comments(code, index);
20
21 Some(Ok(res.parsed.into_value()))
22 } else {
23 Some(Err(res.unwrap_err()))
24 }
25 } else {
26 None }
28 })
29}
30
31#[derive(Debug, Clone)]
34enum ParseTree {
35 Atom(Value),
36 List(Vec<ParseTree>),
37 Quoted(Box<ParseTree>),
38 Comma(Box<ParseTree>),
39}
40
41impl ParseTree {
42 pub fn into_value(self) -> Value {
43 match self {
44 ParseTree::Atom(value) => value,
45 ParseTree::List(vec) => Value::List(
46 vec.into_iter()
47 .map(|parse_tree| parse_tree.into_value())
48 .collect::<List>(),
49 ),
50 ParseTree::Quoted(inner) => lisp! { (quote {inner.into_value()}) },
51 ParseTree::Comma(inner) => lisp! { (comma {inner.into_value()}) },
52 }
53 }
54}
55
56#[derive(Debug, Clone, PartialEq, Eq)]
60pub struct ParseError {
61 pub msg: String,
62 }
64
65impl Display for ParseError {
66 fn fmt(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result {
67 return write!(formatter, "Parse error: {}", self.msg);
68 }
69}
70
71#[derive(Clone, Debug)]
72struct ParsedAndIndex {
73 pub parsed: ParseTree,
74 pub index: usize,
75}
76
77type ParseResult = Option<Result<ParsedAndIndex, ParseError>>;
78type ConsumeResult = Option<usize>;
79
80fn parse_expression(code: &str, index: usize) -> ParseResult {
81 for func in [parse_list, parse_atom] {
82 let res = func(code, index);
83
84 if res.is_some() {
85 return res;
86 }
87 }
88
89 None
90}
91
92fn parse_list(code: &str, index: usize) -> ParseResult {
93 let mut index = consume(code, index, "(")?;
94 let mut members = vec![];
95
96 index = consume_whitespace_and_comments(code, index);
97
98 while let Some(res) = parse_expression(code, index) {
99 if let Ok(res) = res {
100 index = res.index;
101 members.push(res.parsed);
102 index = consume_whitespace_and_comments(code, index);
103 } else {
104 return Some(res);
105 }
106 }
107
108 if let Some(index) = consume(code, index, ")") {
109 Some(Ok(ParsedAndIndex {
110 parsed: ParseTree::List(members),
111 index,
112 }))
113 } else {
114 Some(Err(ParseError {
115 msg: format!("Unclosed list at index {}", index),
116 }))
117 }
118}
119
120fn parse_atom(code: &str, index: usize) -> ParseResult {
121 for func in [
122 parse_quoted,
123 parse_comma,
124 parse_nil,
125 parse_false,
126 parse_true,
127 parse_number,
128 parse_string,
129 parse_symbol,
130 ] {
131 let res = func(code, index);
132
133 if res.is_some() {
134 return res;
135 }
136 }
137
138 None
139}
140
141fn parse_quoted(code: &str, index: usize) -> ParseResult {
142 let index = consume(code, index, "'")?;
143 let res = parse_expression(code, index)?;
144
145 if let Ok(ParsedAndIndex { parsed, index }) = res {
146 Some(Ok(ParsedAndIndex {
147 parsed: ParseTree::Quoted(Box::new(parsed)),
148 index,
149 }))
150 } else {
151 Some(res)
152 }
153}
154
155fn parse_comma(code: &str, index: usize) -> ParseResult {
156 let index = consume(code, index, ",")?;
157 let res = parse_expression(code, index)?;
158
159 if let Ok(ParsedAndIndex { parsed, index }) = res {
160 Some(Ok(ParsedAndIndex {
161 parsed: ParseTree::Comma(Box::new(parsed)),
162 index,
163 }))
164 } else {
165 Some(res)
166 }
167}
168
169fn parse_nil(code: &str, index: usize) -> ParseResult {
170 let index = consume(code, index, "nil")?;
171
172 if next_char_is_break(code, index) {
173 Some(Ok(ParsedAndIndex {
174 parsed: ParseTree::Atom(Value::NIL),
175 index,
176 }))
177 } else {
178 None
179 }
180}
181
182fn parse_false(code: &str, index: usize) -> ParseResult {
183 let index = consume(code, index, "f")?;
184
185 if next_char_is_break(code, index) {
186 Some(Ok(ParsedAndIndex {
187 parsed: ParseTree::Atom(Value::False),
188 index,
189 }))
190 } else {
191 None
192 }
193}
194
195fn parse_true(code: &str, index: usize) -> ParseResult {
196 let index = consume(code, index, "t")?;
197
198 if next_char_is_break(code, index) {
199 Some(Ok(ParsedAndIndex {
200 parsed: ParseTree::Atom(Value::True),
201 index,
202 }))
203 } else {
204 None
205 }
206}
207
208fn parse_number(code: &str, index: usize) -> ParseResult {
209 let (front_last_index, front_last_char) = consume_while(code, index, |(index, ch)| {
210 (index == 0 && ch == '-') || ch.is_numeric()
211 })?;
212
213 if front_last_char.is_numeric() {
214 let front_last_index = front_last_index + 1;
215
216 let back_end = consume_while(code, front_last_index, |(index, ch)| {
217 (index == 0 && ch == '.') || ch.is_numeric()
218 });
219
220 if let Some((back_last_index, _)) = back_end {
221 let back_last_index = back_last_index + 1;
222
223 if back_last_index >= front_last_index + 2 {
224 if next_char_is_break(code, back_last_index) {
225 if let Ok(float) = code
226 .get(index..back_last_index)
227 .unwrap_or("")
228 .parse::<FloatType>()
229 {
230 return Some(Ok(ParsedAndIndex {
231 parsed: ParseTree::Atom(Value::Float(float)),
232 index: back_last_index,
233 }));
234 }
235 }
236 } else if code.as_bytes().get(back_last_index - 1) == Some(&b'.') {
237 return Some(Err(ParseError {
238 msg: format!(
239 "Expected decimal value after '.' at index {}",
240 back_last_index - 1
241 ),
242 }));
243 }
244 }
245
246 if next_char_is_break(code, front_last_index) {
247 if let Ok(int) = code
248 .get(index..front_last_index)
249 .unwrap_or("")
250 .parse::<IntType>()
251 {
252 return Some(Ok(ParsedAndIndex {
253 parsed: ParseTree::Atom(Value::Int(int)),
254 index: front_last_index,
255 }));
256 }
257 }
258 }
259
260 None
261}
262
263fn parse_string(code: &str, index: usize) -> ParseResult {
264 let (last_index, _) = consume_while(code, index, |(index, ch)| {
265 (index == 0 && ch == '"') || (index > 0 && ch != '"')
266 })?;
267
268 if last_index > index {
269 if code.as_bytes().get(last_index + 1) == Some(&b'"') {
270 Some(Ok(ParsedAndIndex {
271 parsed: ParseTree::Atom(Value::String(
272 code.get(index + 1..last_index + 1).unwrap_or("").to_owned(),
273 )),
274 index: last_index + 2,
275 }))
276 } else {
277 Some(Err(ParseError {
278 msg: format!("Unclosed string at index {}", last_index),
279 }))
280 }
281 } else {
282 None
283 }
284}
285
286fn parse_symbol(code: &str, index: usize) -> ParseResult {
287 let (last_index, _) = consume_while(code, index, |(index, ch)| {
288 (index == 0 && is_symbol_start(ch)) || (index > 0 && is_symbolic(ch))
289 })?;
290 let last_index = last_index + 1;
291
292 if last_index > index {
293 Some(Ok(ParsedAndIndex {
294 parsed: ParseTree::Atom(Value::Symbol(Symbol(
295 code.get(index..last_index).unwrap_or("").to_owned(),
296 ))),
297 index: last_index,
298 }))
299 } else {
300 None
301 }
302}
303
304fn consume(code: &str, index: usize, s: &str) -> ConsumeResult {
305 let slice = code.get(index..).unwrap_or("");
306
307 if slice.len() >= s.len()
308 && slice
309 .chars()
310 .zip(s.chars())
311 .all(|(a, b)| a.to_ascii_lowercase() == b.to_ascii_lowercase())
312 {
313 return Some(index + s.len());
314 } else {
315 return None;
316 }
317}
318
319fn consume_whitespace_and_comments(code: &str, index: usize) -> usize {
320 let mut semicolons = 0;
321
322 consume_while(code, index, move |(_, ch)| {
323 if ch == ';' {
324 semicolons += 1;
325 } else if semicolons < 2 || ch == '\n' {
326 semicolons = 0;
327 }
328
329 return ch.is_whitespace() || ch == ';' || semicolons >= 2;
330 })
331 .map(|(index, _)| index + 1)
332 .unwrap_or(index)
333}
334
335fn consume_while<F: FnMut((usize, char)) -> bool>(
336 code: &str,
337 index: usize,
338 mut pred: F,
339) -> Option<(usize, char)> {
340 code.get(index..)
341 .unwrap_or("")
342 .char_indices()
343 .take_while(|(i, c)| pred((*i, *c)))
344 .last()
345 .map(|(last_index, ch)| (last_index + index, ch))
346}
347
348fn is_symbol_start(c: char) -> bool {
349 !c.is_numeric() && is_symbolic(c)
350}
351
352fn is_symbolic(c: char) -> bool {
353 !c.is_whitespace() && !SPECIAL_TOKENS.iter().any(|t| *t == c)
354}
355
356fn next_char_is_break(code: &str, index: usize) -> bool {
357 code.get(index..)
358 .map(|s| s.chars().next())
359 .flatten()
360 .map(|ch| ch.is_whitespace() || SPECIAL_TOKENS.iter().any(|t| *t == ch))
361 .unwrap_or(true)
362}
363
364const SPECIAL_TOKENS: [char; 5] = ['(', ')', '\'', ',', ';'];