use thiserror::Error;
use crate::sexpr::{Sexpr, SexprKind};
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub struct Span {
pub start: usize,
pub end: usize,
pub line: usize,
pub column: usize,
}
#[derive(Debug, Error, Clone, PartialEq)]
pub enum ParseError {
#[error("unexpected character {ch:?} at line {line}, column {column}")]
UnexpectedChar {
ch: char,
line: usize,
column: usize,
},
#[error("unterminated string starting at line {line}, column {column}")]
UnterminatedString { line: usize, column: usize },
#[error("unterminated list (missing ')') opened at line {line}, column {column}")]
UnterminatedList { line: usize, column: usize },
#[error("unexpected ')' at line {line}, column {column}")]
UnexpectedRParen { line: usize, column: usize },
#[error("invalid escape sequence \\{ch} at line {line}, column {column}")]
InvalidEscape {
ch: char,
line: usize,
column: usize,
},
}
pub fn parse(source: &str) -> Result<Vec<Sexpr>, ParseError> {
let mut p = Parser::new(source);
let mut out = Vec::new();
loop {
p.skip_whitespace_and_comments();
if p.eof() {
return Ok(out);
}
out.push(p.parse_one()?);
}
}
struct Parser<'a> {
source: &'a [u8],
pos: usize,
line: usize,
col: usize,
}
impl<'a> Parser<'a> {
fn new(source: &'a str) -> Self {
Self {
source: source.as_bytes(),
pos: 0,
line: 1,
col: 1,
}
}
fn eof(&self) -> bool {
self.pos >= self.source.len()
}
fn peek(&self) -> Option<u8> {
self.source.get(self.pos).copied()
}
fn advance(&mut self) -> Option<u8> {
let b = self.peek()?;
self.pos += 1;
if b == b'\n' {
self.line += 1;
self.col = 1;
} else {
self.col += 1;
}
Some(b)
}
fn skip_whitespace_and_comments(&mut self) {
while let Some(b) = self.peek() {
match b {
b' ' | b'\t' | b'\r' | b'\n' => {
self.advance();
}
b';' => {
while let Some(b) = self.peek() {
if b == b'\n' {
break;
}
self.advance();
}
}
_ => break,
}
}
}
fn parse_one(&mut self) -> Result<Sexpr, ParseError> {
self.skip_whitespace_and_comments();
let start = self.pos;
let line = self.line;
let col = self.col;
let Some(b) = self.peek() else {
return Err(ParseError::UnexpectedChar {
ch: '\0',
line,
column: col,
});
};
match b {
b'(' => self.parse_list(),
b')' => Err(ParseError::UnexpectedRParen { line, column: col }),
b'"' => self.parse_string(),
b if b.is_ascii_digit() || b == b'-' || b == b'+' => {
if (b == b'-' || b == b'+')
&& self
.source
.get(self.pos + 1)
.is_none_or(|c| !c.is_ascii_digit())
{
self.parse_symbol(start, line, col)
} else {
self.parse_number(start, line, col)
}
}
b if b.is_ascii_alphabetic() || b == b'_' => {
self.parse_symbol(start, line, col)
}
_ => Err(ParseError::UnexpectedChar {
ch: b as char,
line,
column: col,
}),
}
}
fn parse_list(&mut self) -> Result<Sexpr, ParseError> {
let start = self.pos;
let line = self.line;
let col = self.col;
self.advance(); let mut items = Vec::new();
loop {
self.skip_whitespace_and_comments();
match self.peek() {
None => {
return Err(ParseError::UnterminatedList { line, column: col });
}
Some(b')') => {
self.advance();
let end = self.pos;
return Ok(Sexpr {
kind: SexprKind::List(items),
span: Span {
start,
end,
line,
column: col,
},
});
}
_ => {
items.push(self.parse_one()?);
}
}
}
}
fn parse_string(&mut self) -> Result<Sexpr, ParseError> {
let start = self.pos;
let line = self.line;
let col = self.col;
self.advance(); let mut s = String::new();
loop {
let Some(b) = self.peek() else {
return Err(ParseError::UnterminatedString { line, column: col });
};
match b {
b'"' => {
self.advance();
let end = self.pos;
return Ok(Sexpr {
kind: SexprKind::String(s),
span: Span {
start,
end,
line,
column: col,
},
});
}
b'\\' => {
self.advance();
let Some(esc) = self.peek() else {
return Err(ParseError::UnterminatedString { line, column: col });
};
let ch = match esc {
b'"' => '"',
b'\\' => '\\',
b'n' => '\n',
b't' => '\t',
b'r' => '\r',
other => {
return Err(ParseError::InvalidEscape {
ch: other as char,
line: self.line,
column: self.col,
});
}
};
s.push(ch);
self.advance();
}
_ => {
s.push(b as char);
self.advance();
}
}
}
}
fn parse_symbol(
&mut self,
start: usize,
line: usize,
col: usize,
) -> Result<Sexpr, ParseError> {
while let Some(b) = self.peek() {
if b.is_ascii_alphanumeric() || b == b'_' || b == b'-' {
self.advance();
} else {
break;
}
}
let end = self.pos;
let s = std::str::from_utf8(&self.source[start..end])
.expect("source is utf8")
.to_string();
Ok(Sexpr {
kind: SexprKind::Symbol(s),
span: Span {
start,
end,
line,
column: col,
},
})
}
fn parse_number(
&mut self,
start: usize,
line: usize,
col: usize,
) -> Result<Sexpr, ParseError> {
self.advance();
while let Some(b) = self.peek() {
if b.is_ascii_digit() || b == b'.' {
self.advance();
} else {
break;
}
}
let end = self.pos;
let s = std::str::from_utf8(&self.source[start..end])
.expect("source is utf8")
.to_string();
Ok(Sexpr {
kind: SexprKind::Number(s),
span: Span {
start,
end,
line,
column: col,
},
})
}
}