1use std::{cell::RefCell, collections::HashMap};
2
3#[cfg(debug_assertions)]
4#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
5struct ArenaId(u32);
6
7#[cfg(not(debug_assertions))]
8#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
9struct ArenaId;
10
11impl ArenaId {
12 fn new() -> ArenaId {
13 #[cfg(debug_assertions)]
14 {
15 use std::sync::atomic::{AtomicU32, Ordering};
16 static ARENA_ID_COUNTER: AtomicU32 = AtomicU32::new(0);
17 let id = ARENA_ID_COUNTER.fetch_add(1, Ordering::SeqCst);
18 ArenaId(id)
19 }
20 #[cfg(not(debug_assertions))]
21 {
22 ArenaId
23 }
24 }
25}
26
27#[derive(Clone, Copy, PartialEq, Eq, Hash)]
28pub struct SExpr {
29 index: u32,
33
34 arena_id: ArenaId,
37}
38
39impl SExpr {
40 const TAG_MASK: u32 = 0b11 << 30;
41
42 const TAG_ATOM: u32 = 0b00;
43 const TAG_LIST: u32 = 0b01;
44 const TAG_STRING: u32 = 0b10;
45
46 fn tag(&self) -> u32 {
47 self.index >> 30
48 }
49
50 pub fn is_atom(&self) -> bool {
52 self.tag() == Self::TAG_ATOM
53 }
54
55 pub fn is_list(&self) -> bool {
57 self.tag() == Self::TAG_LIST
58 }
59
60 pub fn is_string(&self) -> bool {
62 self.tag() == Self::TAG_STRING
63 }
64
65 fn atom(index: u32, arena_id: ArenaId) -> Self {
66 assert_eq!(index & Self::TAG_MASK, 0);
67 SExpr { index, arena_id }
68 }
69
70 fn list(index: u32, arena_id: ArenaId) -> Self {
71 assert_eq!(index & Self::TAG_MASK, 0);
72 let index = index | (Self::TAG_LIST << 30);
73 SExpr { index, arena_id }
74 }
75
76 fn string(index: u32, arena_id: ArenaId) -> Self {
77 assert_eq!(index & Self::TAG_MASK, 0);
78 let index = index | (Self::TAG_STRING << 30);
79 SExpr { index, arena_id }
80 }
81
82 fn index(&self) -> usize {
83 (self.index & !Self::TAG_MASK) as usize
84 }
85}
86
87impl std::fmt::Debug for SExpr {
88 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
89 f.debug_tuple(if self.is_atom() {
90 "SExpr::Atom"
91 } else {
92 "SExpr::List"
93 })
94 .field(&self.index())
95 .finish()
96 }
97}
98
99struct ArenaInner {
100 id: ArenaId,
103
104 strings: Vec<String>,
106
107 string_map: HashMap<&'static str, u32>,
109
110 lists: Vec<Vec<SExpr>>,
112
113 list_map: HashMap<&'static [SExpr], SExpr>,
115}
116
117impl ArenaInner {
118 pub fn intern_string(&mut self, s: impl Into<String>) -> u32 {
119 let ix = self.strings.len() as u32;
120
121 let s: String = s.into();
122
123 let s_ref: &'static str = unsafe { std::mem::transmute(s.as_str()) };
126 self.strings.push(s);
127 self.string_map.insert(s_ref, ix);
128
129 ix
130 }
131}
132
133pub(crate) struct Arena(RefCell<ArenaInner>);
134
135impl Arena {
136 pub fn new() -> Self {
137 Self(RefCell::new(ArenaInner {
138 id: ArenaId::new(),
139 strings: Vec::new(),
140 string_map: HashMap::new(),
141 lists: Vec::new(),
142 list_map: HashMap::new(),
143 }))
144 }
145
146 pub fn atom(&self, name: impl Into<String> + AsRef<str>) -> SExpr {
147 let mut inner = self.0.borrow_mut();
148 let ix = if let Some(ix) = inner.string_map.get(name.as_ref()) {
149 *ix
150 } else {
151 inner.intern_string(name)
152 };
153 SExpr::atom(ix as u32, inner.id)
154 }
155
156 fn string(&self, s: &str) -> SExpr {
157 let mut inner = self.0.borrow_mut();
158 let ix = if let Some(ix) = inner.string_map.get(s) {
159 *ix
160 } else {
161 inner.intern_string(s)
162 };
163 SExpr::string(ix, inner.id)
164 }
165
166 pub fn list(&self, list: Vec<SExpr>) -> SExpr {
167 let mut inner = self.0.borrow_mut();
168 if let Some(sexpr) = inner.list_map.get(&list.as_slice()) {
169 *sexpr
170 } else {
171 let ix = inner.lists.len();
172 let sexpr = SExpr::list(ix as u32, inner.id);
173
174 let list_ref: &'static [SExpr] = unsafe { std::mem::transmute(list.as_slice()) };
177 inner.list_map.insert(list_ref, sexpr);
178 inner.lists.push(list);
179
180 sexpr
181 }
182 }
183
184 pub fn display(&self, sexpr: SExpr) -> DisplayExpr {
185 DisplayExpr { arena: self, sexpr }
186 }
187
188 pub fn get(&self, expr: SExpr) -> SExprData<'_> {
189 let inner = self.0.borrow();
190
191 debug_assert_eq!(
192 inner.id, expr.arena_id,
193 "Use of an `SExpr` with the wrong `Context`! An `SExpr` may only be \
194 used with the `Context` from which it was created!"
195 );
196
197 if expr.is_atom() {
198 let data = unsafe { std::mem::transmute(inner.strings[expr.index()].as_str()) };
201 SExprData::Atom(data)
202 } else if expr.is_list() {
203 let data = unsafe { std::mem::transmute(inner.lists[expr.index()].as_slice()) };
206 SExprData::List(data)
207 } else if expr.is_string() {
208 let data = unsafe { std::mem::transmute(inner.strings[expr.index()].as_str()) };
211 SExprData::String(data)
212 } else {
213 unreachable!()
214 }
215 }
216}
217
218#[derive(Debug)]
235pub enum SExprData<'a> {
236 Atom(&'a str),
237 String(&'a str),
238 List(&'a [SExpr]),
239}
240
241#[derive(Debug)]
244#[non_exhaustive]
245pub enum IntFromSExprError {
246 NotAnAtom,
249
250 ParseIntError(std::num::ParseIntError),
252}
253
254impl std::fmt::Display for IntFromSExprError {
255 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
256 match self {
257 IntFromSExprError::NotAnAtom => write!(
258 f,
259 "The s-expr is a list, not an atom, and \
260 therefore cannot be converted to an integer."
261 ),
262 IntFromSExprError::ParseIntError(_) => {
263 write!(f, "There wasn an error parsing the atom as an integer.")
264 }
265 }
266 }
267}
268
269impl std::error::Error for IntFromSExprError {
270 fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
271 match self {
272 IntFromSExprError::NotAnAtom => None,
273 IntFromSExprError::ParseIntError(inner) => Some(inner as _),
274 }
275 }
276}
277
278impl From<std::num::ParseIntError> for IntFromSExprError {
279 fn from(e: std::num::ParseIntError) -> Self {
280 IntFromSExprError::ParseIntError(e)
281 }
282}
283
284macro_rules! impl_try_from_int {
285 ( $( $ty:ty )* ) => {
286 $(
287 impl TryFrom<SExprData<'_>> for $ty {
288 type Error = IntFromSExprError;
289
290 fn try_from(value: SExprData<'_>) -> Result<Self, Self::Error> {
291 match value {
292 SExprData::Atom(a) => {
293 if let Some(a) = a.strip_prefix("#x") {
294 let x = <$ty>::from_str_radix(a, 16)?;
295 return Ok(x);
296 }
297
298 if let Some(a) = a.strip_prefix("#b") {
299 let x = <$ty>::from_str_radix(a, 2)?;
300 return Ok(x);
301 }
302
303 let x = a.parse::<$ty>()?;
304 Ok(x)
305 }
306 SExprData::String(_) | SExprData::List(_) => Err(IntFromSExprError::NotAnAtom),
307 }
308 }
309 }
310 )*
311 };
312}
313
314impl_try_from_int!(u8 u16 u32 u64 u128 usize);
315
316pub struct DisplayExpr<'a> {
317 arena: &'a Arena,
318 sexpr: SExpr,
319}
320
321impl<'a> std::fmt::Display for DisplayExpr<'a> {
322 fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
323 return fmt_sexpr(f, self.arena, self.sexpr);
324
325 fn fmt_sexpr(f: &mut std::fmt::Formatter, arena: &Arena, sexpr: SExpr) -> std::fmt::Result {
326 match arena.get(sexpr) {
327 SExprData::Atom(data) => std::fmt::Display::fmt(data, f),
328 SExprData::String(data) => std::fmt::Debug::fmt(data, f),
329 SExprData::List(data) => {
330 write!(f, "(")?;
331 let mut sep = "";
332 for s in data {
333 std::fmt::Display::fmt(sep, f)?;
334 fmt_sexpr(f, arena, *s)?;
335 sep = " ";
336 }
337 write!(f, ")")
338 }
339 }
340 }
341 }
342}
343
344#[derive(Debug)]
345pub(crate) enum ParseError {
346 Message(String),
348
349 More,
351}
352
353#[cfg(test)]
354impl ParseError {
355 fn expect_message(self) -> String {
356 match self {
357 ParseError::Message(msg) => msg,
358 ParseError::More => panic!("Expected a ParseError::Message"),
359 }
360 }
361
362 fn expect_more(self) {
363 match self {
364 ParseError::Message(_) => panic!("Expected a ParseError::More"),
365 ParseError::More => (),
366 }
367 }
368}
369
370pub(crate) struct Parser {
371 context: Vec<Vec<SExpr>>,
372}
373
374impl Parser {
375 pub(crate) fn new() -> Self {
376 Self {
377 context: Vec::new(),
378 }
379 }
380
381 pub(crate) fn reset(&mut self) {
382 self.context.clear();
383 }
384
385 fn atom(&mut self, arena: &Arena, sym: &str) -> Option<SExpr> {
386 let expr = arena.atom(sym);
387 if let Some(outer) = self.context.last_mut() {
388 outer.push(expr);
389 None
390 } else {
391 Some(expr)
392 }
393 }
394
395 fn string(&mut self, arena: &Arena, sym: &str) -> Option<SExpr> {
396 let expr = arena.string(sym);
397 if let Some(outer) = self.context.last_mut() {
398 outer.push(expr);
399 None
400 } else {
401 Some(expr)
402 }
403 }
404
405 fn app(&mut self, arena: &Arena) -> Option<SExpr> {
406 if let Some(args) = self.context.pop() {
407 let expr = arena.list(args);
408 if let Some(outer) = self.context.last_mut() {
409 outer.push(expr);
410 } else {
411 return Some(expr);
412 }
413 }
414 None
415 }
416
417 pub(crate) fn parse(&mut self, arena: &Arena, bytes: &str) -> Result<SExpr, ParseError> {
418 let lexer = Lexer::new(bytes);
419 for token in lexer {
420 match token {
421 Token::Symbol(sym) => {
422 let res = self.atom(arena, sym);
423 if let Some(res) = res {
424 return Ok(res);
425 }
426 }
427
428 Token::String(lit) => {
429 let res = self.string(arena, lit);
430 if let Some(res) = res {
431 return Ok(res);
432 }
433 }
434
435 Token::LParen => self.context.push(Vec::new()),
436
437 Token::RParen => {
438 let res = self.app(arena);
439 if let Some(res) = res {
440 return Ok(res);
441 }
442 }
443
444 Token::Error(msg) => {
445 return Err(ParseError::Message(msg));
446 }
447 }
448 }
449
450 Err(ParseError::More)
451 }
452}
453
454#[derive(Debug)]
455enum Token<'a> {
456 LParen,
457 RParen,
458 Symbol(&'a str),
459 String(&'a str),
460 Error(String),
461}
462
463struct Lexer<'a> {
464 chars: &'a str,
465 indices: std::iter::Peekable<std::str::CharIndices<'a>>,
466}
467
468impl<'a> Lexer<'a> {
469 fn new(chars: &'a str) -> Self {
470 Self {
471 chars,
472 indices: chars.char_indices().peekable(),
473 }
474 }
475
476 fn scan_symbol(&mut self, start: usize, is_quote: bool) -> Token<'a> {
478 let mut quoted = is_quote;
480 let mut end;
481
482 loop {
483 if let Some((ix, c)) = self.indices.peek() {
484 end = *ix;
485 if quoted && *c != '|' {
486 self.indices.next();
488 continue;
489 } else if *c == '|' {
490 quoted = !quoted;
492 self.indices.next();
493 continue;
494 } else if c.is_alphabetic() || c.is_numeric() || "~!@$%^&*_-+=<>.?/".contains(*c) {
495 self.indices.next();
496 continue;
497 }
498 } else {
499 end = self.chars.len();
500 }
501
502 break;
503 }
504
505 if quoted {
506 return Token::Error(format!("Unterminated `|` in symbol starting at {start}"));
507 }
508
509 Token::Symbol(&self.chars[start..end])
510 }
511
512 fn scan_string(&mut self, start: usize) -> Token<'a> {
515 while let Some((ix, c)) = self.indices.next() {
516 if c == '\\' {
517 self.indices.next();
518 continue;
519 }
520
521 if c == '"' {
522 return Token::String(&self.chars[start + 1..ix]);
523 }
524 }
525
526 Token::Error(format!(
527 "Failed to find terminator for string literal at offset {start}"
528 ))
529 }
530}
531
532impl<'a> Iterator for Lexer<'a> {
533 type Item = Token<'a>;
534
535 fn next(&mut self) -> Option<Self::Item> {
536 while let Some((start, c)) = self.indices.next() {
537 match c {
538 '(' => {
539 return Some(Token::LParen);
540 }
541
542 ')' => {
543 return Some(Token::RParen);
544 }
545
546 '"' => {
547 return Some(self.scan_string(start));
548 }
549
550 ';' => self.indices = self.chars[0..0].char_indices().peekable(),
553
554 c if c.is_whitespace() => {}
555
556 c => return Some(self.scan_symbol(start, c == '|')),
557 }
558 }
559
560 None
561 }
562}
563
564#[cfg(test)]
565mod tests {
566 use super::{Arena, Parser, SExprData};
567 use crate::ContextBuilder;
568
569 #[test]
570 fn is_atom() {
571 let ctx = ContextBuilder::new().build().unwrap();
572 let pizza = ctx.atom("pizza");
573 assert!(pizza.is_atom());
574 assert!(!pizza.is_list());
575 }
576
577 #[test]
578 fn is_list() {
579 let ctx = ContextBuilder::new().build().unwrap();
580 let toppings = ctx.list(vec![
581 ctx.atom("tomato-sauce"),
582 ctx.atom("mozzarella"),
583 ctx.atom("basil"),
584 ]);
585 assert!(toppings.is_list());
586 assert!(!toppings.is_atom());
587 }
588
589 #[test]
590 fn parses_string_lit() {
591 let arena = Arena::new();
592 let mut p = Parser::new();
593
594 let expr = p.parse(&arena, "(error \"line 3 column 16: Parsing function declaration. Expecting sort list '(' got :a\")").expect("Parsing a list with a string literal");
595
596 assert!(expr.is_list());
597
598 let SExprData::List(es) = arena.get(expr) else {
599 panic!("Failed to parse a list");
600 };
601
602 assert_eq!(es.len(), 2);
603
604 assert!(es[0].is_atom());
605 assert!(es[1].is_string());
606
607 match arena.get(es[1]) {
608 SExprData::String(text) => assert_eq!(
609 text,
610 "line 3 column 16: Parsing function declaration. Expecting sort list '(' got :a"
611 ),
612 _ => unreachable!(),
613 };
614
615 let expr = p
616 .parse(&arena, "\"\"")
617 .expect("Parsing the empty string literal");
618
619 assert!(expr.is_string());
620 match arena.get(expr) {
621 SExprData::String(text) => assert!(text.is_empty()),
622 _ => unreachable!(),
623 }
624 }
625
626 #[test]
627 fn parse_error() {
628 let arena = Arena::new();
629 let mut p = Parser::new();
630
631 let err = p
632 .parse(&arena, "(error \"line)")
633 .expect_err("Unterminated string literal should fail to parse")
634 .expect_message();
635
636 assert_eq!(
637 err,
638 "Failed to find terminator for string literal at offset 7"
639 );
640 }
641
642 #[test]
643 fn parse_multi_line() {
644 let arena = Arena::new();
645 let mut p = Parser::new();
646
647 p.parse(&arena, "(open (extra \"sequence\")")
648 .expect_err("Open list should expect more")
649 .expect_more();
650
651 p.parse(&arena, "b")
652 .expect_err("Single atom doesn't close a list")
653 .expect_more();
654
655 let expr = p.parse(&arena, ")")
656 .expect("Closing paren should finish the parse");
657
658 let SExprData::List(es) = arena.get(expr) else {
659 panic!("Failed to parse a list");
660 };
661
662 assert_eq!(es.len(), 3);
663 assert!(es[0].is_atom());
664 assert!(es[1].is_list());
665 assert!(es[2].is_atom());
666 }
667}