1use std::{cell::RefCell, collections::HashMap, ops::Div};
2
3#[cfg(debug_assertions)]
4#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
5struct ArenaId(u32);
6
7#[cfg(not(debug_assertions))]
8#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
9struct ArenaId;
10
11impl ArenaId {
12 fn new() -> ArenaId {
13 #[cfg(debug_assertions)]
14 {
15 use std::sync::atomic::{AtomicU32, Ordering};
16 static ARENA_ID_COUNTER: AtomicU32 = AtomicU32::new(0);
17 let id = ARENA_ID_COUNTER.fetch_add(1, Ordering::SeqCst);
18 ArenaId(id)
19 }
20 #[cfg(not(debug_assertions))]
21 {
22 ArenaId
23 }
24 }
25}
26
27#[derive(Clone, Copy, PartialEq, Eq, Hash)]
28pub struct SExpr {
29 index: u32,
33
34 arena_id: ArenaId,
37}
38
39impl SExpr {
40 const TAG_MASK: u32 = 0b11 << 30;
41
42 const TAG_ATOM: u32 = 0b00;
43 const TAG_LIST: u32 = 0b01;
44 const TAG_STRING: u32 = 0b10;
45
46 fn tag(&self) -> u32 {
47 self.index >> 30
48 }
49
50 pub fn is_atom(&self) -> bool {
52 self.tag() == Self::TAG_ATOM
53 }
54
55 pub fn is_list(&self) -> bool {
57 self.tag() == Self::TAG_LIST
58 }
59
60 pub fn is_string(&self) -> bool {
62 self.tag() == Self::TAG_STRING
63 }
64
65 fn atom(index: u32, arena_id: ArenaId) -> Self {
66 assert_eq!(index & Self::TAG_MASK, 0);
67 SExpr { index, arena_id }
68 }
69
70 fn list(index: u32, arena_id: ArenaId) -> Self {
71 assert_eq!(index & Self::TAG_MASK, 0);
72 let index = index | (Self::TAG_LIST << 30);
73 SExpr { index, arena_id }
74 }
75
76 fn string(index: u32, arena_id: ArenaId) -> Self {
77 assert_eq!(index & Self::TAG_MASK, 0);
78 let index = index | (Self::TAG_STRING << 30);
79 SExpr { index, arena_id }
80 }
81
82 fn index(&self) -> usize {
83 (self.index & !Self::TAG_MASK) as usize
84 }
85}
86
87impl std::fmt::Debug for SExpr {
88 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
89 f.debug_tuple(if self.is_atom() {
90 "SExpr::Atom"
91 } else {
92 "SExpr::List"
93 })
94 .field(&self.index())
95 .finish()
96 }
97}
98
99struct ArenaInner {
100 id: ArenaId,
103
104 strings: Vec<String>,
106
107 string_map: HashMap<&'static str, u32>,
109
110 lists: Vec<Vec<SExpr>>,
112
113 list_map: HashMap<&'static [SExpr], SExpr>,
115}
116
117impl ArenaInner {
118 pub fn intern_string(&mut self, s: impl Into<String>) -> u32 {
119 let ix = self.strings.len() as u32;
120
121 let s: String = s.into();
122
123 let s_ref: &'static str = unsafe { std::mem::transmute(s.as_str()) };
126 self.strings.push(s);
127 self.string_map.insert(s_ref, ix);
128
129 ix
130 }
131}
132
133pub(crate) struct Arena(RefCell<ArenaInner>);
134
135impl Arena {
136 pub fn new() -> Self {
137 Self(RefCell::new(ArenaInner {
138 id: ArenaId::new(),
139 strings: Vec::new(),
140 string_map: HashMap::new(),
141 lists: Vec::new(),
142 list_map: HashMap::new(),
143 }))
144 }
145
146 pub fn atom(&self, name: impl Into<String> + AsRef<str>) -> SExpr {
147 let mut inner = self.0.borrow_mut();
148 let ix = if let Some(ix) = inner.string_map.get(name.as_ref()) {
149 *ix
150 } else {
151 inner.intern_string(name)
152 };
153 SExpr::atom(ix as u32, inner.id)
154 }
155
156 fn string(&self, s: &str) -> SExpr {
157 let mut inner = self.0.borrow_mut();
158 let ix = if let Some(ix) = inner.string_map.get(s) {
159 *ix
160 } else {
161 inner.intern_string(s)
162 };
163 SExpr::string(ix, inner.id)
164 }
165
166 pub fn list(&self, list: Vec<SExpr>) -> SExpr {
167 let mut inner = self.0.borrow_mut();
168 if let Some(sexpr) = inner.list_map.get(&list.as_slice()) {
169 *sexpr
170 } else {
171 let ix = inner.lists.len();
172 let sexpr = SExpr::list(ix as u32, inner.id);
173
174 let list_ref: &'static [SExpr] = unsafe { std::mem::transmute(list.as_slice()) };
177 inner.list_map.insert(list_ref, sexpr);
178 inner.lists.push(list);
179
180 sexpr
181 }
182 }
183
184 pub fn display(&self, sexpr: SExpr) -> DisplayExpr {
185 DisplayExpr { arena: self, sexpr }
186 }
187
188 pub fn get(&self, expr: SExpr) -> SExprData<'_> {
189 let inner = self.0.borrow();
190
191 debug_assert_eq!(
192 inner.id, expr.arena_id,
193 "Use of an `SExpr` with the wrong `Context`! An `SExpr` may only be \
194 used with the `Context` from which it was created!"
195 );
196
197 if expr.is_atom() {
198 let data = unsafe { std::mem::transmute(inner.strings[expr.index()].as_str()) };
201 SExprData::Atom(data)
202 } else if expr.is_list() {
203 let data = unsafe { std::mem::transmute(inner.lists[expr.index()].as_slice()) };
206 SExprData::List(data)
207 } else if expr.is_string() {
208 let data = unsafe { std::mem::transmute(inner.strings[expr.index()].as_str()) };
211 SExprData::String(data)
212 } else {
213 unreachable!()
214 }
215 }
216
217 pub fn get_atom(&self, expr: SExpr) -> Option<&str> {
218 if !expr.is_atom() {
219 return None;
220 }
221
222 let inner = self.0.borrow();
223 let data = unsafe { std::mem::transmute(inner.strings[expr.index()].as_str()) };
226 Some(data)
227 }
228
229 pub fn get_str(&self, expr: SExpr) -> Option<&str> {
230 if !expr.is_string() {
231 return None;
232 }
233
234 let inner = self.0.borrow();
235 let data = unsafe { std::mem::transmute(inner.strings[expr.index()].as_str()) };
238 Some(data)
239 }
240
241 pub fn get_list(&self, expr: SExpr) -> Option<&[SExpr]> {
242 if !expr.is_list() {
243 return None;
244 }
245
246 let inner = self.0.borrow();
247 let data = unsafe { std::mem::transmute(inner.lists[expr.index()].as_slice()) };
250 return Some(data);
251 }
252
253 pub(crate) fn get_i<T: TryParseInt>(&self, expr: SExpr) -> Option<T> {
254 let inner = self.0.borrow();
255
256 if expr.is_atom() {
257 let data = inner.strings[expr.index()].as_str();
258 return T::try_parse_t(data, false);
259 }
260
261 if expr.is_list() {
262 let data = inner.lists[expr.index()].as_slice();
263
264 if data.len() != 2 || data.iter().any(|expr| !expr.is_atom()) {
265 return None;
266 }
267
268 let is_negated = match inner.strings[data[0].index()].as_str() {
269 "-" => true,
270 "+" => false,
271 _ => return None,
272 };
273
274 let r_data = inner.strings[data[1].index()].as_str();
275
276 return T::try_parse_t(r_data, is_negated);
277 }
278
279 None
280 }
281
282 pub(crate) fn get_f<T: TryParseFloat + Div<Output = T>>(&self, expr: SExpr) -> Option<T> {
283 let inner = self.0.borrow();
284
285 if expr.is_atom() {
286 let data = inner.strings[expr.index()].as_str();
287 return T::try_parse_t(data, false);
288 }
289
290 if expr.is_list() {
291 let mut data = inner.lists[expr.index()].as_slice();
292
293 if !([1, 2, 3].contains(&data.len())) || !data[0].is_atom() {
294 return None;
295 }
296
297 let mut index = 0;
298 let is_negated = match inner.strings[data[0].index()].as_str() {
299 "-" => {
300 index += 1;
301 true
302 }
303 "+" => {
304 index += 1;
305 false
306 }
307 _ => false,
308 };
309
310 if data.len() == 2 && !data[1].is_atom() {
312 data = inner.lists[data[1].index()].as_slice();
313 index = 0;
314 }
315
316 let data = &data[index..];
317
318 if data.len() == 1 {
319 return T::try_parse_t(inner.strings[data[0].index()].as_str(), is_negated);
320 }
321
322 if data.len() == 3 && inner.strings[data[0].index()].as_str() == "/" {
324 let numerator =
325 T::try_parse_t(inner.strings[data[1].index()].as_str(), is_negated)?;
326 let denominator = T::try_parse_t(inner.strings[data[2].index()].as_str(), false)?;
327 return Some(numerator / denominator);
328 }
329 }
330
331 None
332 }
333}
334
335pub(crate) trait TryParseInt: Sized {
336 fn try_parse_t(a: &str, negate: bool) -> Option<Self>;
337}
338
339macro_rules! impl_get_int {
340 ( $( $ty:ty )* ) => {
341 $(
342 impl TryParseInt for $ty {
343 fn try_parse_t(a: &str, negate: bool) -> Option<Self> {
344 let x = if let Some(a) = a.strip_prefix("#x") {
345 <$ty>::from_str_radix(a, 16).ok()?
346 } else if let Some(a) = a.strip_prefix("#b") {
347 <$ty>::from_str_radix(a , 2).ok()?
348 } else {
349 a.parse::<$ty>().ok()?
350 };
351
352 if negate {
353 return x.checked_neg();
354 }
355
356 Some(x)
357 }
358 }
359 )*
360 };
361}
362
363impl_get_int!(u8 u16 u32 u64 u128 usize i8 i16 i32 i64 i128 isize);
364
365pub(crate) trait TryParseFloat: Sized {
366 fn try_parse_t(a: &str, negate: bool) -> Option<Self>;
367}
368
369macro_rules! impl_get_float {
370 ( $( $ty:ty )* ) => {
371 $(
372 impl TryParseFloat for $ty {
373 fn try_parse_t(a: &str, negate: bool) -> Option<Self> {
374 let mut x = a.parse::<$ty>().ok()?;
375
376 if negate {
377 x = -x;
378 }
379
380 Some(x)
381 }
382 }
383 )*
384 };
385}
386
387impl_get_float!(f32 f64);
388
389#[derive(Debug)]
407pub enum SExprData<'a> {
408 Atom(&'a str),
409 String(&'a str),
410 List(&'a [SExpr]),
411}
412
413#[derive(Debug)]
416#[non_exhaustive]
417pub enum IntFromSExprError {
418 NotAnAtom,
421
422 ParseIntError(std::num::ParseIntError),
424}
425
426impl std::fmt::Display for IntFromSExprError {
427 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
428 match self {
429 IntFromSExprError::NotAnAtom => write!(
430 f,
431 "The s-expr is a list, not an atom, and \
432 therefore cannot be converted to an integer."
433 ),
434 IntFromSExprError::ParseIntError(_) => {
435 write!(f, "There wasn an error parsing the atom as an integer.")
436 }
437 }
438 }
439}
440
441impl std::error::Error for IntFromSExprError {
442 fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
443 match self {
444 IntFromSExprError::NotAnAtom => None,
445 IntFromSExprError::ParseIntError(inner) => Some(inner as _),
446 }
447 }
448}
449
450impl From<std::num::ParseIntError> for IntFromSExprError {
451 fn from(e: std::num::ParseIntError) -> Self {
452 IntFromSExprError::ParseIntError(e)
453 }
454}
455
456macro_rules! impl_try_from_int {
457 ( $( $ty:ty )* ) => {
458 $(
459 impl TryFrom<SExprData<'_>> for $ty {
460 type Error = IntFromSExprError;
461
462 fn try_from(value: SExprData<'_>) -> Result<Self, Self::Error> {
463 match value {
464 SExprData::Atom(a) => {
465 if let Some(a) = a.strip_prefix("#x") {
466 let x = <$ty>::from_str_radix(a, 16)?;
467 return Ok(x);
468 }
469
470 if let Some(a) = a.strip_prefix("#b") {
471 let x = <$ty>::from_str_radix(a, 2)?;
472 return Ok(x);
473 }
474
475 let x = a.parse::<$ty>()?;
476 Ok(x)
477 }
478 SExprData::String(_) | SExprData::List(_) => Err(IntFromSExprError::NotAnAtom),
479 }
480 }
481 }
482 )*
483 };
484}
485
486impl_try_from_int!(u8 u16 u32 u64 u128 usize);
487
488pub struct DisplayExpr<'a> {
489 arena: &'a Arena,
490 sexpr: SExpr,
491}
492
493impl<'a> std::fmt::Display for DisplayExpr<'a> {
494 fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
495 return fmt_sexpr(f, self.arena, self.sexpr);
496
497 fn fmt_sexpr(f: &mut std::fmt::Formatter, arena: &Arena, sexpr: SExpr) -> std::fmt::Result {
498 match arena.get(sexpr) {
499 SExprData::Atom(data) => std::fmt::Display::fmt(data, f),
500 SExprData::String(data) => std::fmt::Debug::fmt(data, f),
501 SExprData::List(data) => {
502 write!(f, "(")?;
503 let mut sep = "";
504 for s in data {
505 std::fmt::Display::fmt(sep, f)?;
506 fmt_sexpr(f, arena, *s)?;
507 sep = " ";
508 }
509 write!(f, ")")
510 }
511 }
512 }
513 }
514}
515
516#[derive(Debug)]
517pub(crate) enum ParseError {
518 Message(String),
520
521 More,
523}
524
525#[cfg(test)]
526impl ParseError {
527 fn expect_message(self) -> String {
528 match self {
529 ParseError::Message(msg) => msg,
530 ParseError::More => panic!("Expected a ParseError::Message"),
531 }
532 }
533
534 fn expect_more(self) {
535 match self {
536 ParseError::Message(_) => panic!("Expected a ParseError::More"),
537 ParseError::More => (),
538 }
539 }
540}
541
542pub(crate) struct Parser {
543 context: Vec<Vec<SExpr>>,
544}
545
546impl Parser {
547 pub(crate) fn new() -> Self {
548 Self {
549 context: Vec::new(),
550 }
551 }
552
553 pub(crate) fn reset(&mut self) {
554 self.context.clear();
555 }
556
557 fn atom(&mut self, arena: &Arena, sym: &str) -> Option<SExpr> {
558 let expr = arena.atom(sym);
559 if let Some(outer) = self.context.last_mut() {
560 outer.push(expr);
561 None
562 } else {
563 Some(expr)
564 }
565 }
566
567 fn string(&mut self, arena: &Arena, sym: &str) -> Option<SExpr> {
568 let expr = arena.string(sym);
569 if let Some(outer) = self.context.last_mut() {
570 outer.push(expr);
571 None
572 } else {
573 Some(expr)
574 }
575 }
576
577 fn app(&mut self, arena: &Arena) -> Option<SExpr> {
578 if let Some(args) = self.context.pop() {
579 let expr = arena.list(args);
580 if let Some(outer) = self.context.last_mut() {
581 outer.push(expr);
582 } else {
583 return Some(expr);
584 }
585 }
586 None
587 }
588
589 pub(crate) fn parse(&mut self, arena: &Arena, bytes: &str) -> Result<SExpr, ParseError> {
590 let lexer = Lexer::new(bytes);
591 for token in lexer {
592 match token {
593 Token::Symbol(sym) => {
594 let res = self.atom(arena, sym);
595 if let Some(res) = res {
596 return Ok(res);
597 }
598 }
599
600 Token::String(lit) => {
601 let res = self.string(arena, lit);
602 if let Some(res) = res {
603 return Ok(res);
604 }
605 }
606
607 Token::LParen => self.context.push(Vec::new()),
608
609 Token::RParen => {
610 let res = self.app(arena);
611 if let Some(res) = res {
612 return Ok(res);
613 }
614 }
615
616 Token::Error(msg) => {
617 return Err(ParseError::Message(msg));
618 }
619 }
620 }
621
622 Err(ParseError::More)
623 }
624}
625
626#[derive(Debug)]
627enum Token<'a> {
628 LParen,
629 RParen,
630 Symbol(&'a str),
631 String(&'a str),
632 Error(String),
633}
634
635struct Lexer<'a> {
636 chars: &'a str,
637 indices: std::iter::Peekable<std::str::CharIndices<'a>>,
638}
639
640impl<'a> Lexer<'a> {
641 fn new(chars: &'a str) -> Self {
642 Self {
643 chars,
644 indices: chars.char_indices().peekable(),
645 }
646 }
647
648 fn scan_symbol(&mut self, start: usize, is_quote: bool) -> Token<'a> {
650 let mut quoted = is_quote;
652 let mut end;
653
654 loop {
655 if let Some((ix, c)) = self.indices.peek() {
656 end = *ix;
657 if quoted && *c != '|' {
658 self.indices.next();
660 continue;
661 } else if *c == '|' {
662 quoted = !quoted;
664 self.indices.next();
665 continue;
666 } else if c.is_alphabetic() || c.is_numeric() || "~!@$%^&*_-+=<>.?/".contains(*c) {
667 self.indices.next();
668 continue;
669 }
670 } else {
671 end = self.chars.len();
672 }
673
674 break;
675 }
676
677 if quoted {
678 return Token::Error(format!("Unterminated `|` in symbol starting at {start}"));
679 }
680
681 Token::Symbol(&self.chars[start..end])
682 }
683
684 fn scan_string(&mut self, start: usize) -> Token<'a> {
687 while let Some((ix, c)) = self.indices.next() {
688 if c == '\\' {
689 self.indices.next();
690 continue;
691 }
692
693 if c == '"' {
694 return Token::String(&self.chars[start + 1..ix]);
695 }
696 }
697
698 Token::Error(format!(
699 "Failed to find terminator for string literal at offset {start}"
700 ))
701 }
702}
703
704impl<'a> Iterator for Lexer<'a> {
705 type Item = Token<'a>;
706
707 fn next(&mut self) -> Option<Self::Item> {
708 while let Some((start, c)) = self.indices.next() {
709 match c {
710 '(' => {
711 return Some(Token::LParen);
712 }
713
714 ')' => {
715 return Some(Token::RParen);
716 }
717
718 '"' => {
719 return Some(self.scan_string(start));
720 }
721
722 ';' => self.indices = self.chars[0..0].char_indices().peekable(),
725
726 c if c.is_whitespace() => {}
727
728 c => return Some(self.scan_symbol(start, c == '|')),
729 }
730 }
731
732 None
733 }
734}
735
736#[cfg(test)]
737mod tests {
738 use super::{Arena, Parser, SExprData};
739 use crate::ContextBuilder;
740
741 #[test]
742 fn is_atom() {
743 let ctx = ContextBuilder::new().build().unwrap();
744 let pizza = ctx.atom("pizza");
745 assert!(pizza.is_atom());
746 assert!(!pizza.is_list());
747 }
748
749 #[test]
750 fn is_list() {
751 let ctx = ContextBuilder::new().build().unwrap();
752 let toppings = ctx.list(vec![
753 ctx.atom("tomato-sauce"),
754 ctx.atom("mozzarella"),
755 ctx.atom("basil"),
756 ]);
757 assert!(toppings.is_list());
758 assert!(!toppings.is_atom());
759 }
760
761 #[test]
762 fn parses_string_lit() {
763 let arena = Arena::new();
764 let mut p = Parser::new();
765
766 let expr = p.parse(&arena, "(error \"line 3 column 16: Parsing function declaration. Expecting sort list '(' got :a\")").expect("Parsing a list with a string literal");
767
768 assert!(expr.is_list());
769
770 let SExprData::List(es) = arena.get(expr) else {
771 panic!("Failed to parse a list");
772 };
773
774 assert_eq!(es.len(), 2);
775
776 assert!(es[0].is_atom());
777 assert!(es[1].is_string());
778
779 match arena.get(es[1]) {
780 SExprData::String(text) => assert_eq!(
781 text,
782 "line 3 column 16: Parsing function declaration. Expecting sort list '(' got :a"
783 ),
784 _ => unreachable!(),
785 };
786
787 let expr = p
788 .parse(&arena, "\"\"")
789 .expect("Parsing the empty string literal");
790
791 assert!(expr.is_string());
792 match arena.get(expr) {
793 SExprData::String(text) => assert!(text.is_empty()),
794 _ => unreachable!(),
795 }
796 }
797
798 #[test]
799 fn parse_error() {
800 let arena = Arena::new();
801 let mut p = Parser::new();
802
803 let err = p
804 .parse(&arena, "(error \"line)")
805 .expect_err("Unterminated string literal should fail to parse")
806 .expect_message();
807
808 assert_eq!(
809 err,
810 "Failed to find terminator for string literal at offset 7"
811 );
812 }
813
814 #[test]
815 fn parse_multi_line() {
816 let arena = Arena::new();
817 let mut p = Parser::new();
818
819 p.parse(&arena, "(open (extra \"sequence\")")
820 .expect_err("Open list should expect more")
821 .expect_more();
822
823 p.parse(&arena, "b")
824 .expect_err("Single atom doesn't close a list")
825 .expect_more();
826
827 let expr = p
828 .parse(&arena, ")")
829 .expect("Closing paren should finish the parse");
830
831 let SExprData::List(es) = arena.get(expr) else {
832 panic!("Failed to parse a list");
833 };
834
835 assert_eq!(es.len(), 3);
836 assert!(es[0].is_atom());
837 assert!(es[1].is_list());
838 assert!(es[2].is_atom());
839 }
840}