1use thiserror::Error;
15
16use crate::sexpr::{Sexpr, SexprKind};
17
18#[derive(Debug, Clone, Copy, PartialEq, Eq)]
22pub struct Span {
23 pub start: usize,
24 pub end: usize,
25 pub line: usize,
26 pub column: usize,
27}
28
29#[derive(Debug, Error, Clone, PartialEq)]
30pub enum ParseError {
31 #[error("unexpected character {ch:?} at line {line}, column {column}")]
32 UnexpectedChar {
33 ch: char,
34 line: usize,
35 column: usize,
36 },
37 #[error("unterminated string starting at line {line}, column {column}")]
38 UnterminatedString { line: usize, column: usize },
39 #[error("unterminated list (missing ')') opened at line {line}, column {column}")]
40 UnterminatedList { line: usize, column: usize },
41 #[error("unexpected ')' at line {line}, column {column}")]
42 UnexpectedRParen { line: usize, column: usize },
43 #[error("invalid escape sequence \\{ch} at line {line}, column {column}")]
44 InvalidEscape {
45 ch: char,
46 line: usize,
47 column: usize,
48 },
49}
50
51pub fn parse(source: &str) -> Result<Vec<Sexpr>, ParseError> {
53 let mut p = Parser::new(source);
54 let mut out = Vec::new();
55 loop {
56 p.skip_whitespace_and_comments();
57 if p.eof() {
58 return Ok(out);
59 }
60 out.push(p.parse_one()?);
61 }
62}
63
64struct Parser<'a> {
65 source: &'a [u8],
66 pos: usize,
67 line: usize,
68 col: usize,
69}
70
71impl<'a> Parser<'a> {
72 fn new(source: &'a str) -> Self {
73 Self {
74 source: source.as_bytes(),
75 pos: 0,
76 line: 1,
77 col: 1,
78 }
79 }
80
81 fn eof(&self) -> bool {
82 self.pos >= self.source.len()
83 }
84
85 fn peek(&self) -> Option<u8> {
86 self.source.get(self.pos).copied()
87 }
88
89 fn advance(&mut self) -> Option<u8> {
90 let b = self.peek()?;
91 self.pos += 1;
92 if b == b'\n' {
93 self.line += 1;
94 self.col = 1;
95 } else {
96 self.col += 1;
97 }
98 Some(b)
99 }
100
101 fn skip_whitespace_and_comments(&mut self) {
102 while let Some(b) = self.peek() {
103 match b {
104 b' ' | b'\t' | b'\r' | b'\n' => {
105 self.advance();
106 }
107 b';' => {
108 while let Some(b) = self.peek() {
109 if b == b'\n' {
110 break;
111 }
112 self.advance();
113 }
114 }
115 _ => break,
116 }
117 }
118 }
119
120 fn parse_one(&mut self) -> Result<Sexpr, ParseError> {
121 self.skip_whitespace_and_comments();
122 let start = self.pos;
123 let line = self.line;
124 let col = self.col;
125 let Some(b) = self.peek() else {
126 return Err(ParseError::UnexpectedChar {
127 ch: '\0',
128 line,
129 column: col,
130 });
131 };
132 match b {
133 b'(' => self.parse_list(),
134 b')' => Err(ParseError::UnexpectedRParen { line, column: col }),
135 b'"' => self.parse_string(),
136 b if b.is_ascii_digit() || b == b'-' || b == b'+' => {
137 if (b == b'-' || b == b'+')
140 && self
141 .source
142 .get(self.pos + 1)
143 .is_none_or(|c| !c.is_ascii_digit())
144 {
145 self.parse_symbol(start, line, col)
146 } else {
147 self.parse_number(start, line, col)
148 }
149 }
150 b if b.is_ascii_alphabetic() || b == b'_' => {
151 self.parse_symbol(start, line, col)
152 }
153 _ => Err(ParseError::UnexpectedChar {
154 ch: b as char,
155 line,
156 column: col,
157 }),
158 }
159 }
160
161 fn parse_list(&mut self) -> Result<Sexpr, ParseError> {
162 let start = self.pos;
163 let line = self.line;
164 let col = self.col;
165 self.advance(); let mut items = Vec::new();
167 loop {
168 self.skip_whitespace_and_comments();
169 match self.peek() {
170 None => {
171 return Err(ParseError::UnterminatedList { line, column: col });
172 }
173 Some(b')') => {
174 self.advance();
175 let end = self.pos;
176 return Ok(Sexpr {
177 kind: SexprKind::List(items),
178 span: Span {
179 start,
180 end,
181 line,
182 column: col,
183 },
184 });
185 }
186 _ => {
187 items.push(self.parse_one()?);
188 }
189 }
190 }
191 }
192
193 fn parse_string(&mut self) -> Result<Sexpr, ParseError> {
194 let start = self.pos;
195 let line = self.line;
196 let col = self.col;
197 self.advance(); let mut s = String::new();
199 loop {
200 let Some(b) = self.peek() else {
201 return Err(ParseError::UnterminatedString { line, column: col });
202 };
203 match b {
204 b'"' => {
205 self.advance();
206 let end = self.pos;
207 return Ok(Sexpr {
208 kind: SexprKind::String(s),
209 span: Span {
210 start,
211 end,
212 line,
213 column: col,
214 },
215 });
216 }
217 b'\\' => {
218 self.advance();
219 let Some(esc) = self.peek() else {
220 return Err(ParseError::UnterminatedString { line, column: col });
221 };
222 let ch = match esc {
223 b'"' => '"',
224 b'\\' => '\\',
225 b'n' => '\n',
226 b't' => '\t',
227 b'r' => '\r',
228 other => {
229 return Err(ParseError::InvalidEscape {
230 ch: other as char,
231 line: self.line,
232 column: self.col,
233 });
234 }
235 };
236 s.push(ch);
237 self.advance();
238 }
239 _ => {
240 s.push(b as char);
241 self.advance();
242 }
243 }
244 }
245 }
246
247 fn parse_symbol(
248 &mut self,
249 start: usize,
250 line: usize,
251 col: usize,
252 ) -> Result<Sexpr, ParseError> {
253 while let Some(b) = self.peek() {
254 if b.is_ascii_alphanumeric() || b == b'_' || b == b'-' {
255 self.advance();
256 } else {
257 break;
258 }
259 }
260 let end = self.pos;
261 let s = std::str::from_utf8(&self.source[start..end])
262 .expect("source is utf8")
263 .to_string();
264 Ok(Sexpr {
265 kind: SexprKind::Symbol(s),
266 span: Span {
267 start,
268 end,
269 line,
270 column: col,
271 },
272 })
273 }
274
275 fn parse_number(
276 &mut self,
277 start: usize,
278 line: usize,
279 col: usize,
280 ) -> Result<Sexpr, ParseError> {
281 self.advance();
284 while let Some(b) = self.peek() {
285 if b.is_ascii_digit() || b == b'.' {
286 self.advance();
287 } else {
288 break;
289 }
290 }
291 let end = self.pos;
292 let s = std::str::from_utf8(&self.source[start..end])
293 .expect("source is utf8")
294 .to_string();
295 Ok(Sexpr {
296 kind: SexprKind::Number(s),
297 span: Span {
298 start,
299 end,
300 line,
301 column: col,
302 },
303 })
304 }
305}