use std::{cell::RefCell, collections::HashMap};
#[cfg(debug_assertions)]
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
struct ArenaId(u32);
#[cfg(not(debug_assertions))]
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
struct ArenaId;
impl ArenaId {
fn new() -> ArenaId {
#[cfg(debug_assertions)]
{
use std::sync::atomic::{AtomicU32, Ordering};
static ARENA_ID_COUNTER: AtomicU32 = AtomicU32::new(0);
let id = ARENA_ID_COUNTER.fetch_add(1, Ordering::SeqCst);
ArenaId(id)
}
#[cfg(not(debug_assertions))]
{
ArenaId
}
}
}
#[derive(Clone, Copy, PartialEq, Eq, Hash)]
pub struct SExpr {
index: u32,
arena_id: ArenaId,
}
impl SExpr {
const TAG_MASK: u32 = 0b11 << 30;
const TAG_ATOM: u32 = 0b00;
const TAG_LIST: u32 = 0b01;
const TAG_STRING: u32 = 0b10;
fn tag(&self) -> u32 {
self.index >> 30
}
pub fn is_atom(&self) -> bool {
self.tag() == Self::TAG_ATOM
}
pub fn is_list(&self) -> bool {
self.tag() == Self::TAG_LIST
}
pub fn is_string(&self) -> bool {
self.tag() == Self::TAG_STRING
}
fn atom(index: u32, arena_id: ArenaId) -> Self {
assert_eq!(index & Self::TAG_MASK, 0);
SExpr { index, arena_id }
}
fn list(index: u32, arena_id: ArenaId) -> Self {
assert_eq!(index & Self::TAG_MASK, 0);
let index = index | (Self::TAG_LIST << 30);
SExpr { index, arena_id }
}
fn string(index: u32, arena_id: ArenaId) -> Self {
assert_eq!(index & Self::TAG_MASK, 0);
let index = index | (Self::TAG_STRING << 30);
SExpr { index, arena_id }
}
fn index(&self) -> usize {
(self.index & !Self::TAG_MASK) as usize
}
}
impl std::fmt::Debug for SExpr {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_tuple(if self.is_atom() {
"SExpr::Atom"
} else {
"SExpr::List"
})
.field(&self.index())
.finish()
}
}
struct ArenaInner {
id: ArenaId,
strings: Vec<String>,
string_map: HashMap<&'static str, u32>,
lists: Vec<Vec<SExpr>>,
list_map: HashMap<&'static [SExpr], SExpr>,
}
impl ArenaInner {
pub fn intern_string(&mut self, s: impl Into<String>) -> u32 {
let ix = self.strings.len() as u32;
let s: String = s.into();
let s_ref: &'static str = unsafe { std::mem::transmute(s.as_str()) };
self.strings.push(s);
self.string_map.insert(s_ref, ix);
ix
}
}
pub(crate) struct Arena(RefCell<ArenaInner>);
impl Arena {
pub fn new() -> Self {
Self(RefCell::new(ArenaInner {
id: ArenaId::new(),
strings: Vec::new(),
string_map: HashMap::new(),
lists: Vec::new(),
list_map: HashMap::new(),
}))
}
pub fn atom(&self, name: impl Into<String> + AsRef<str>) -> SExpr {
let mut inner = self.0.borrow_mut();
let ix = if let Some(ix) = inner.string_map.get(name.as_ref()) {
*ix
} else {
inner.intern_string(name)
};
SExpr::atom(ix as u32, inner.id)
}
fn string(&self, s: &str) -> SExpr {
let mut inner = self.0.borrow_mut();
let ix = if let Some(ix) = inner.string_map.get(s) {
*ix
} else {
inner.intern_string(s)
};
SExpr::string(ix, inner.id)
}
pub fn list(&self, list: Vec<SExpr>) -> SExpr {
let mut inner = self.0.borrow_mut();
if let Some(sexpr) = inner.list_map.get(&list.as_slice()) {
*sexpr
} else {
let ix = inner.lists.len();
let sexpr = SExpr::list(ix as u32, inner.id);
let list_ref: &'static [SExpr] = unsafe { std::mem::transmute(list.as_slice()) };
inner.list_map.insert(list_ref, sexpr);
inner.lists.push(list);
sexpr
}
}
pub fn display(&self, sexpr: SExpr) -> DisplayExpr {
DisplayExpr { arena: self, sexpr }
}
pub fn get(&self, expr: SExpr) -> SExprData<'_> {
let inner = self.0.borrow();
debug_assert_eq!(
inner.id, expr.arena_id,
"Use of an `SExpr` with the wrong `Context`! An `SExpr` may only be \
used with the `Context` from which it was created!"
);
if expr.is_atom() {
let data = unsafe { std::mem::transmute(inner.strings[expr.index()].as_str()) };
SExprData::Atom(data)
} else if expr.is_list() {
let data = unsafe { std::mem::transmute(inner.lists[expr.index()].as_slice()) };
SExprData::List(data)
} else if expr.is_string() {
let data = unsafe { std::mem::transmute(inner.strings[expr.index()].as_str()) };
SExprData::String(data)
} else {
unreachable!()
}
}
}
#[derive(Debug)]
pub enum SExprData<'a> {
Atom(&'a str),
String(&'a str),
List(&'a [SExpr]),
}
#[derive(Debug)]
#[non_exhaustive]
pub enum IntFromSExprError {
NotAnAtom,
ParseIntError(std::num::ParseIntError),
}
impl std::fmt::Display for IntFromSExprError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
IntFromSExprError::NotAnAtom => write!(
f,
"The s-expr is a list, not an atom, and \
therefore cannot be converted to an integer."
),
IntFromSExprError::ParseIntError(_) => {
write!(f, "There wasn an error parsing the atom as an integer.")
}
}
}
}
impl std::error::Error for IntFromSExprError {
fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
match self {
IntFromSExprError::NotAnAtom => None,
IntFromSExprError::ParseIntError(inner) => Some(inner as _),
}
}
}
impl From<std::num::ParseIntError> for IntFromSExprError {
fn from(e: std::num::ParseIntError) -> Self {
IntFromSExprError::ParseIntError(e)
}
}
macro_rules! impl_try_from_int {
( $( $ty:ty )* ) => {
$(
impl TryFrom<SExprData<'_>> for $ty {
type Error = IntFromSExprError;
fn try_from(value: SExprData<'_>) -> Result<Self, Self::Error> {
match value {
SExprData::Atom(a) => {
if let Some(a) = a.strip_prefix("#x") {
let x = <$ty>::from_str_radix(a, 16)?;
return Ok(x);
}
if let Some(a) = a.strip_prefix("#b") {
let x = <$ty>::from_str_radix(a, 2)?;
return Ok(x);
}
let x = a.parse::<$ty>()?;
Ok(x)
}
SExprData::String(_) | SExprData::List(_) => Err(IntFromSExprError::NotAnAtom),
}
}
}
)*
};
}
impl_try_from_int!(u8 u16 u32 u64 u128 usize);
pub struct DisplayExpr<'a> {
arena: &'a Arena,
sexpr: SExpr,
}
impl<'a> std::fmt::Display for DisplayExpr<'a> {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
return fmt_sexpr(f, self.arena, self.sexpr);
fn fmt_sexpr(f: &mut std::fmt::Formatter, arena: &Arena, sexpr: SExpr) -> std::fmt::Result {
match arena.get(sexpr) {
SExprData::Atom(data) => std::fmt::Display::fmt(data, f),
SExprData::String(data) => std::fmt::Debug::fmt(data, f),
SExprData::List(data) => {
write!(f, "(")?;
let mut sep = "";
for s in data {
std::fmt::Display::fmt(sep, f)?;
fmt_sexpr(f, arena, *s)?;
sep = " ";
}
write!(f, ")")
}
}
}
}
}
pub(crate) struct Parser {
context: Vec<Vec<SExpr>>,
}
impl Parser {
pub(crate) fn new() -> Self {
Self {
context: Vec::new(),
}
}
pub(crate) fn reset(&mut self) {
self.context.clear();
}
fn atom(&mut self, arena: &Arena, sym: &str) -> Option<SExpr> {
let expr = arena.atom(sym);
if let Some(outer) = self.context.last_mut() {
outer.push(expr);
None
} else {
Some(expr)
}
}
fn string(&mut self, arena: &Arena, sym: &str) -> Option<SExpr> {
let expr = arena.string(sym);
if let Some(outer) = self.context.last_mut() {
outer.push(expr);
None
} else {
Some(expr)
}
}
fn app(&mut self, arena: &Arena) -> Option<SExpr> {
if let Some(args) = self.context.pop() {
let expr = arena.list(args);
if let Some(outer) = self.context.last_mut() {
outer.push(expr);
} else {
return Some(expr);
}
}
None
}
pub(crate) fn parse(&mut self, arena: &Arena, bytes: &str) -> Result<SExpr, Option<String>> {
let lexer = Lexer::new(bytes);
for token in lexer {
match token {
Token::Symbol(sym) => {
let res = self.atom(arena, sym);
if let Some(res) = res {
return Ok(res);
}
}
Token::String(lit) => {
let res = self.string(arena, lit);
if let Some(res) = res {
return Ok(res);
}
}
Token::LParen => self.context.push(Vec::new()),
Token::RParen => {
let res = self.app(arena);
if let Some(res) = res {
return Ok(res);
}
}
Token::Error(msg) => {
return Err(Some(msg));
}
}
}
Err(None)
}
}
#[derive(Debug)]
enum Token<'a> {
LParen,
RParen,
Symbol(&'a str),
String(&'a str),
Error(String),
}
struct Lexer<'a> {
chars: &'a str,
indices: std::iter::Peekable<std::str::CharIndices<'a>>,
}
impl<'a> Lexer<'a> {
fn new(chars: &'a str) -> Self {
Self {
chars,
indices: chars.char_indices().peekable(),
}
}
fn scan_symbol(&mut self, start: usize, is_quote: bool) -> Token<'a> {
let mut quoted = is_quote;
let mut end;
loop {
if let Some((ix, c)) = self.indices.peek() {
end = *ix;
if quoted && *c != '|' {
self.indices.next();
continue;
} else if *c == '|' {
quoted = !quoted;
self.indices.next();
continue;
} else if c.is_alphabetic() || c.is_numeric() || "~!@$%^&*_-+=<>.?/".contains(*c) {
self.indices.next();
continue;
}
} else {
end = self.chars.len();
}
break;
}
if quoted {
return Token::Error(format!("Unterminated `|` in symbol starting at {start}"));
}
Token::Symbol(&self.chars[start..end])
}
fn scan_string(&mut self, start: usize) -> Token<'a> {
while let Some((ix, c)) = self.indices.next() {
if c == '\\' {
self.indices.next();
continue;
}
if c == '"' {
return Token::String(&self.chars[start + 1..ix]);
}
}
Token::Error(format!(
"Failed to find terminator for string literal at offset {start}"
))
}
}
impl<'a> Iterator for Lexer<'a> {
type Item = Token<'a>;
fn next(&mut self) -> Option<Self::Item> {
while let Some((start, c)) = self.indices.next() {
match c {
'(' => {
return Some(Token::LParen);
}
')' => {
return Some(Token::RParen);
}
'"' => {
return Some(self.scan_string(start));
}
';' => self.indices = self.chars[0..0].char_indices().peekable(),
c if c.is_whitespace() => {}
c => return Some(self.scan_symbol(start, c == '|')),
}
}
None
}
}
#[cfg(test)]
mod tests {
use super::{Arena, Parser, SExprData};
use crate::ContextBuilder;
#[test]
fn is_atom() {
let ctx = ContextBuilder::new().build().unwrap();
let pizza = ctx.atom("pizza");
assert!(pizza.is_atom());
assert!(!pizza.is_list());
}
#[test]
fn is_list() {
let ctx = ContextBuilder::new().build().unwrap();
let toppings = ctx.list(vec![
ctx.atom("tomato-sauce"),
ctx.atom("mozzarella"),
ctx.atom("basil"),
]);
assert!(toppings.is_list());
assert!(!toppings.is_atom());
}
#[test]
fn parses_string_lit() {
let arena = Arena::new();
let mut p = Parser::new();
let expr = p.parse(&arena, "(error \"line 3 column 16: Parsing function declaration. Expecting sort list '(' got :a\")").expect("Parsing a list with a string literal");
assert!(expr.is_list());
let SExprData::List(es) = arena.get(expr) else {
panic!("Failed to parse a list");
};
assert_eq!(es.len(), 2);
assert!(es[0].is_atom());
assert!(es[1].is_string());
match arena.get(es[1]) {
SExprData::String(text) => assert_eq!(
text,
"line 3 column 16: Parsing function declaration. Expecting sort list '(' got :a"
),
_ => unreachable!(),
};
let expr = p
.parse(&arena, "\"\"")
.expect("Parsing the empty string literal");
assert!(expr.is_string());
match arena.get(expr) {
SExprData::String(text) => assert!(text.is_empty()),
_ => unreachable!(),
}
}
#[test]
fn parse_error() {
let arena = Arena::new();
let mut p = Parser::new();
let err = p.parse(&arena, "(error \"line)")
.expect_err("Unterminated string literal should fail to parse")
.expect("String literal errors should have error messages");
assert_eq!(err, "Failed to find terminator for string literal at offset 7");
}
}