#[derive(PartialEq, Debug)]
pub enum Expression<'a> {
Number(f64),
Bool(bool),
Str(&'a str),
Symbol(&'a str),
List(Vec<Expression<'a>>),
Null,
}
#[derive(PartialEq, Debug, Clone)]
pub enum OwnedExpression {
Number(f64),
Bool(bool),
Str(String),
Symbol(String),
List(Vec<OwnedExpression>),
Null,
}
#[derive(Debug, thiserror::Error)]
pub enum ParseError {
#[error("Unexpected EOF")]
UnexpectedEOF,
#[error("Missing closing parenthesis")]
MissingClosingParen,
#[error("Unexpected closing parenthesis")]
UnexpectedClosingParen,
}
impl<'a> Expression<'a> {
pub fn to_owned(&self) -> OwnedExpression {
match self {
Expression::Number(n) => OwnedExpression::Number(*n),
Expression::Bool(b) => OwnedExpression::Bool(*b),
Expression::Str(s) => OwnedExpression::Str(s.to_string()),
Expression::Symbol(s) => OwnedExpression::Symbol(s.to_string()),
Expression::List(list) => OwnedExpression::List(
list.iter().map(|expr| expr.to_owned()).collect()
),
Expression::Null => OwnedExpression::Null,
}
}
}
fn tokenize(src: &str) -> Vec<&str> {
let mut tokens = Vec::with_capacity(src.len() / 2);
let mut current = src;
while !current.is_empty() {
current = current.trim_start();
if current.is_empty() { break; }
let (token, rest) = match current.find(|c: char| c.is_whitespace() || "()'".contains(c)) {
Some(pos) => {
let token = ¤t[..pos];
let rest = ¤t[pos..];
(token, rest)
}
None => (current, ""),
};
if !token.is_empty() {
tokens.push(token);
}
if !rest.is_empty() {
let delimiter = &rest[..1];
if "()'".contains(delimiter) {
tokens.push(delimiter);
}
current = &rest[1..];
} else {
break;
}
}
tokens
}
fn parse<'a>(tokens: &mut &[&'a str]) -> Result<Expression<'a>, ParseError> {
if tokens.is_empty() {
return Err(ParseError::UnexpectedEOF);
}
let token = tokens[0];
*tokens = &tokens[1..];
match token {
"(" => {
let mut stack = Vec::with_capacity(8);
while !tokens.is_empty() && tokens[0] != ")" {
stack.push(parse(tokens)?);
}
if tokens.is_empty() {
return Err(ParseError::MissingClosingParen);
}
*tokens = &tokens[1..]; Ok(Expression::List(stack))
}
")" => Err(ParseError::UnexpectedClosingParen),
_ => Ok(parse_atom(token)),
}
}
fn parse_atom(token: &str) -> Expression {
if token.len() == 1 {
return Expression::Symbol(token);
}
if let Some(first) = token.chars().next() {
if first.is_ascii_digit() || first == '-' || first == '+' {
if let Ok(n) = token.parse::<f64>() {
return Expression::Number(n);
}
}
}
match token {
"true" => return Expression::Bool(true),
"false" => return Expression::Bool(false),
"null" => return Expression::Null,
_ => {}
}
if token.len() >= 2 && token.starts_with('"') && token.ends_with('"') {
if let Some(content) = token.get(1..token.len()-1) {
return Expression::Str(content);
}
}
Expression::Symbol(token)
}
pub fn read(src: &str) -> Result<Expression, ParseError> {
let tokens = tokenize(src);
let mut token_slice = tokens.as_slice();
parse(&mut token_slice)
}
pub fn read_unchecked(src: &str) -> Expression {
read(src).expect("Failed to parse S-expression")
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn tokenize_test() {
assert_eq!(tokenize("this is a test"), vec!["this", "is", "a", "test"]);
assert_eq!(tokenize("(hello world)"), vec!["(", "hello", "world", ")"]);
}
#[test]
fn read_test() {
let result = read("(1 symbol \"string\" true null (1.5))").unwrap();
println!("{:?}", result);
assert!(read("(unclosed").is_err());
assert!(read(")unexpected").is_err());
}
#[test]
fn fast_path_tests() {
let result = read("a").unwrap();
assert!(matches!(result, Expression::Symbol("a")));
let result = read("42").unwrap();
assert!(matches!(result, Expression::Number(42.0)));
let result = read("-3.14").unwrap();
assert!(matches!(result, Expression::Number(-3.14)));
}
#[test]
fn performance_test() {
let input = "(define (factorial n) (if (= n 0) 1 (* n (factorial (- n 1)))))";
let start = std::time::Instant::now();
for _ in 0..1000 {
let _ = read(input);
}
let duration = start.elapsed();
println!("Parsed 1000 times in {:?}", duration);
}
}