easy_smt/
sexpr.rs

1use std::{cell::RefCell, collections::HashMap};
2
3#[cfg(debug_assertions)]
4#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
5struct ArenaId(u32);
6
7#[cfg(not(debug_assertions))]
8#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
9struct ArenaId;
10
11impl ArenaId {
12    fn new() -> ArenaId {
13        #[cfg(debug_assertions)]
14        {
15            use std::sync::atomic::{AtomicU32, Ordering};
16            static ARENA_ID_COUNTER: AtomicU32 = AtomicU32::new(0);
17            let id = ARENA_ID_COUNTER.fetch_add(1, Ordering::SeqCst);
18            ArenaId(id)
19        }
20        #[cfg(not(debug_assertions))]
21        {
22            ArenaId
23        }
24    }
25}
26
27#[derive(Clone, Copy, PartialEq, Eq, Hash)]
28pub struct SExpr {
29    // The index of this `SExpr`'s data within `ArenaInner::atoms` or
30    // `ArenaInner::lists`. The top two bits are reserved to tag this as either
31    // an atom, a list, or a string literal.
32    index: u32,
33
34    // The ID of the arena that this `SExpr` is associated with. Used for debug
35    // assertions.
36    arena_id: ArenaId,
37}
38
39impl SExpr {
40    const TAG_MASK: u32 = 0b11 << 30;
41
42    const TAG_ATOM: u32 = 0b00;
43    const TAG_LIST: u32 = 0b01;
44    const TAG_STRING: u32 = 0b10;
45
46    fn tag(&self) -> u32 {
47        self.index >> 30
48    }
49
50    /// Is this `SExpr` an atom?
51    pub fn is_atom(&self) -> bool {
52        self.tag() == Self::TAG_ATOM
53    }
54
55    /// Is this `SExpr` a list?
56    pub fn is_list(&self) -> bool {
57        self.tag() == Self::TAG_LIST
58    }
59
60    /// Is this `SExpr` a string literal?
61    pub fn is_string(&self) -> bool {
62        self.tag() == Self::TAG_STRING
63    }
64
65    fn atom(index: u32, arena_id: ArenaId) -> Self {
66        assert_eq!(index & Self::TAG_MASK, 0);
67        SExpr { index, arena_id }
68    }
69
70    fn list(index: u32, arena_id: ArenaId) -> Self {
71        assert_eq!(index & Self::TAG_MASK, 0);
72        let index = index | (Self::TAG_LIST << 30);
73        SExpr { index, arena_id }
74    }
75
76    fn string(index: u32, arena_id: ArenaId) -> Self {
77        assert_eq!(index & Self::TAG_MASK, 0);
78        let index = index | (Self::TAG_STRING << 30);
79        SExpr { index, arena_id }
80    }
81
82    fn index(&self) -> usize {
83        (self.index & !Self::TAG_MASK) as usize
84    }
85}
86
87impl std::fmt::Debug for SExpr {
88    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
89        f.debug_tuple(if self.is_atom() {
90            "SExpr::Atom"
91        } else {
92            "SExpr::List"
93        })
94        .field(&self.index())
95        .finish()
96    }
97}
98
99struct ArenaInner {
100    /// The ID of this Arena. Used for debug asserts that any given `SExpr`
101    /// belongs to this arena.
102    id: ArenaId,
103
104    /// Interned strings.
105    strings: Vec<String>,
106
107    /// Backwards lookup for interned strings.
108    string_map: HashMap<&'static str, u32>,
109
110    /// Interned lists.
111    lists: Vec<Vec<SExpr>>,
112
113    /// Backwards lookup for interned lists.
114    list_map: HashMap<&'static [SExpr], SExpr>,
115}
116
117impl ArenaInner {
118    pub fn intern_string(&mut self, s: impl Into<String>) -> u32 {
119        let ix = self.strings.len() as u32;
120
121        let s: String = s.into();
122
123        // Safety argument: the name will live as long as the context as it is inserted into
124        // the vector below and never removed or resized.
125        let s_ref: &'static str = unsafe { std::mem::transmute(s.as_str()) };
126        self.strings.push(s);
127        self.string_map.insert(s_ref, ix);
128
129        ix
130    }
131}
132
133pub(crate) struct Arena(RefCell<ArenaInner>);
134
135impl Arena {
136    pub fn new() -> Self {
137        Self(RefCell::new(ArenaInner {
138            id: ArenaId::new(),
139            strings: Vec::new(),
140            string_map: HashMap::new(),
141            lists: Vec::new(),
142            list_map: HashMap::new(),
143        }))
144    }
145
146    pub fn atom(&self, name: impl Into<String> + AsRef<str>) -> SExpr {
147        let mut inner = self.0.borrow_mut();
148        let ix = if let Some(ix) = inner.string_map.get(name.as_ref()) {
149            *ix
150        } else {
151            inner.intern_string(name)
152        };
153        SExpr::atom(ix as u32, inner.id)
154    }
155
156    fn string(&self, s: &str) -> SExpr {
157        let mut inner = self.0.borrow_mut();
158        let ix = if let Some(ix) = inner.string_map.get(s) {
159            *ix
160        } else {
161            inner.intern_string(s)
162        };
163        SExpr::string(ix, inner.id)
164    }
165
166    pub fn list(&self, list: Vec<SExpr>) -> SExpr {
167        let mut inner = self.0.borrow_mut();
168        if let Some(sexpr) = inner.list_map.get(&list.as_slice()) {
169            *sexpr
170        } else {
171            let ix = inner.lists.len();
172            let sexpr = SExpr::list(ix as u32, inner.id);
173
174            // Safety argument: the name will live as long as the context as it is inserted into
175            // the vector below and never removed or resized.
176            let list_ref: &'static [SExpr] = unsafe { std::mem::transmute(list.as_slice()) };
177            inner.list_map.insert(list_ref, sexpr);
178            inner.lists.push(list);
179
180            sexpr
181        }
182    }
183
184    pub fn display(&self, sexpr: SExpr) -> DisplayExpr {
185        DisplayExpr { arena: self, sexpr }
186    }
187
188    pub fn get(&self, expr: SExpr) -> SExprData<'_> {
189        let inner = self.0.borrow();
190
191        debug_assert_eq!(
192            inner.id, expr.arena_id,
193            "Use of an `SExpr` with the wrong `Context`! An `SExpr` may only be \
194             used with the `Context` from which it was created!"
195        );
196
197        if expr.is_atom() {
198            // Safety argument: the data will live as long as the containing context, and is
199            // immutable once it's inserted, so using the lifteime of the Arena is acceptable.
200            let data = unsafe { std::mem::transmute(inner.strings[expr.index()].as_str()) };
201            SExprData::Atom(data)
202        } else if expr.is_list() {
203            // Safety argument: the data will live as long as the containing context, and is
204            // immutable once it's inserted, so using the lifteime of the Arena is acceptable.
205            let data = unsafe { std::mem::transmute(inner.lists[expr.index()].as_slice()) };
206            SExprData::List(data)
207        } else if expr.is_string() {
208            // Safety argument: the data will live as long as the containing context, and is
209            // immutable once it's inserted, so using the lifteime of the Arena is acceptable.
210            let data = unsafe { std::mem::transmute(inner.strings[expr.index()].as_str()) };
211            SExprData::String(data)
212        } else {
213            unreachable!()
214        }
215    }
216}
217
218/// The data contents of an [`SExpr`][crate::SExpr].
219///
220/// ## Converting `SExprData` to an Integer
221///
222/// There are `TryFrom<SExprData>` implementations for common integer types that
223/// you can use:
224///
225/// ```
226/// let mut ctx = easy_smt::ContextBuilder::new().build().unwrap();
227///
228/// let neg_one = ctx.binary(8, -1_i8);
229/// assert_eq!(ctx.display(neg_one).to_string(), "#b11111111");
230///
231/// let x = u8::try_from(ctx.get(neg_one)).unwrap();
232/// assert_eq!(x, 0xff);
233/// ```
234#[derive(Debug)]
235pub enum SExprData<'a> {
236    Atom(&'a str),
237    String(&'a str),
238    List(&'a [SExpr]),
239}
240
241/// An error which can be returned when trying to interpret an s-expr as an
242/// integer.
243#[derive(Debug)]
244#[non_exhaustive]
245pub enum IntFromSExprError {
246    /// The s-expr is a list, not an atom, and therefore cannot be converted to
247    /// an integer.
248    NotAnAtom,
249
250    /// There was an error parsing the atom as an integer.
251    ParseIntError(std::num::ParseIntError),
252}
253
254impl std::fmt::Display for IntFromSExprError {
255    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
256        match self {
257            IntFromSExprError::NotAnAtom => write!(
258                f,
259                "The s-expr is a list, not an atom, and \
260                 therefore cannot be converted to an integer."
261            ),
262            IntFromSExprError::ParseIntError(_) => {
263                write!(f, "There wasn an error parsing the atom as an integer.")
264            }
265        }
266    }
267}
268
269impl std::error::Error for IntFromSExprError {
270    fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
271        match self {
272            IntFromSExprError::NotAnAtom => None,
273            IntFromSExprError::ParseIntError(inner) => Some(inner as _),
274        }
275    }
276}
277
278impl From<std::num::ParseIntError> for IntFromSExprError {
279    fn from(e: std::num::ParseIntError) -> Self {
280        IntFromSExprError::ParseIntError(e)
281    }
282}
283
284macro_rules! impl_try_from_int {
285    ( $( $ty:ty )* ) => {
286        $(
287            impl TryFrom<SExprData<'_>> for $ty {
288                type Error = IntFromSExprError;
289
290                fn try_from(value: SExprData<'_>) -> Result<Self, Self::Error> {
291                    match value {
292                        SExprData::Atom(a) => {
293                            if let Some(a) = a.strip_prefix("#x") {
294                                let x = <$ty>::from_str_radix(a, 16)?;
295                                return Ok(x);
296                            }
297
298                            if let Some(a) = a.strip_prefix("#b") {
299                                let x = <$ty>::from_str_radix(a, 2)?;
300                                return Ok(x);
301                            }
302
303                            let x = a.parse::<$ty>()?;
304                            Ok(x)
305                        }
306                        SExprData::String(_) | SExprData::List(_) => Err(IntFromSExprError::NotAnAtom),
307                    }
308                }
309            }
310        )*
311    };
312}
313
314impl_try_from_int!(u8 u16 u32 u64 u128 usize);
315
316pub struct DisplayExpr<'a> {
317    arena: &'a Arena,
318    sexpr: SExpr,
319}
320
321impl<'a> std::fmt::Display for DisplayExpr<'a> {
322    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
323        return fmt_sexpr(f, self.arena, self.sexpr);
324
325        fn fmt_sexpr(f: &mut std::fmt::Formatter, arena: &Arena, sexpr: SExpr) -> std::fmt::Result {
326            match arena.get(sexpr) {
327                SExprData::Atom(data) => std::fmt::Display::fmt(data, f),
328                SExprData::String(data) => std::fmt::Debug::fmt(data, f),
329                SExprData::List(data) => {
330                    write!(f, "(")?;
331                    let mut sep = "";
332                    for s in data {
333                        std::fmt::Display::fmt(sep, f)?;
334                        fmt_sexpr(f, arena, *s)?;
335                        sep = " ";
336                    }
337                    write!(f, ")")
338                }
339            }
340        }
341    }
342}
343
344#[derive(Debug)]
345pub(crate) enum ParseError {
346    /// Parsing failed.
347    Message(String),
348
349    /// More input is needed to finish parsing.
350    More,
351}
352
353#[cfg(test)]
354impl ParseError {
355    fn expect_message(self) -> String {
356        match self {
357            ParseError::Message(msg) => msg,
358            ParseError::More => panic!("Expected a ParseError::Message"),
359        }
360    }
361
362    fn expect_more(self) {
363        match self {
364            ParseError::Message(_) => panic!("Expected a ParseError::More"),
365            ParseError::More => (),
366        }
367    }
368}
369
370pub(crate) struct Parser {
371    context: Vec<Vec<SExpr>>,
372}
373
374impl Parser {
375    pub(crate) fn new() -> Self {
376        Self {
377            context: Vec::new(),
378        }
379    }
380
381    pub(crate) fn reset(&mut self) {
382        self.context.clear();
383    }
384
385    fn atom(&mut self, arena: &Arena, sym: &str) -> Option<SExpr> {
386        let expr = arena.atom(sym);
387        if let Some(outer) = self.context.last_mut() {
388            outer.push(expr);
389            None
390        } else {
391            Some(expr)
392        }
393    }
394
395    fn string(&mut self, arena: &Arena, sym: &str) -> Option<SExpr> {
396        let expr = arena.string(sym);
397        if let Some(outer) = self.context.last_mut() {
398            outer.push(expr);
399            None
400        } else {
401            Some(expr)
402        }
403    }
404
405    fn app(&mut self, arena: &Arena) -> Option<SExpr> {
406        if let Some(args) = self.context.pop() {
407            let expr = arena.list(args);
408            if let Some(outer) = self.context.last_mut() {
409                outer.push(expr);
410            } else {
411                return Some(expr);
412            }
413        }
414        None
415    }
416
417    pub(crate) fn parse(&mut self, arena: &Arena, bytes: &str) -> Result<SExpr, ParseError> {
418        let lexer = Lexer::new(bytes);
419        for token in lexer {
420            match token {
421                Token::Symbol(sym) => {
422                    let res = self.atom(arena, sym);
423                    if let Some(res) = res {
424                        return Ok(res);
425                    }
426                }
427
428                Token::String(lit) => {
429                    let res = self.string(arena, lit);
430                    if let Some(res) = res {
431                        return Ok(res);
432                    }
433                }
434
435                Token::LParen => self.context.push(Vec::new()),
436
437                Token::RParen => {
438                    let res = self.app(arena);
439                    if let Some(res) = res {
440                        return Ok(res);
441                    }
442                }
443
444                Token::Error(msg) => {
445                    return Err(ParseError::Message(msg));
446                }
447            }
448        }
449
450        Err(ParseError::More)
451    }
452}
453
454#[derive(Debug)]
455enum Token<'a> {
456    LParen,
457    RParen,
458    Symbol(&'a str),
459    String(&'a str),
460    Error(String),
461}
462
463struct Lexer<'a> {
464    chars: &'a str,
465    indices: std::iter::Peekable<std::str::CharIndices<'a>>,
466}
467
468impl<'a> Lexer<'a> {
469    fn new(chars: &'a str) -> Self {
470        Self {
471            chars,
472            indices: chars.char_indices().peekable(),
473        }
474    }
475
476    /// Scan the current symbol and return the complete lexed string.
477    fn scan_symbol(&mut self, start: usize, is_quote: bool) -> Token<'a> {
478        // Are we within a || pair?
479        let mut quoted = is_quote;
480        let mut end;
481
482        loop {
483            if let Some((ix, c)) = self.indices.peek() {
484                end = *ix;
485                if quoted && *c != '|' {
486                    // If we're in a quoted context, treat this as one identifier.
487                    self.indices.next();
488                    continue;
489                } else if *c == '|' {
490                    // If we see a quote, toggle the quoted flag.
491                    quoted = !quoted;
492                    self.indices.next();
493                    continue;
494                } else if c.is_alphabetic() || c.is_numeric() || "~!@$%^&*_-+=<>.?/".contains(*c) {
495                    self.indices.next();
496                    continue;
497                }
498            } else {
499                end = self.chars.len();
500            }
501
502            break;
503        }
504
505        if quoted {
506            return Token::Error(format!("Unterminated `|` in symbol starting at {start}"));
507        }
508
509        Token::Symbol(&self.chars[start..end])
510    }
511
512    /// Scan a string literal. `start` is expected to be the offset of the opening `"`. The scanned
513    /// string excludes both the start and end quotes.
514    fn scan_string(&mut self, start: usize) -> Token<'a> {
515        while let Some((ix, c)) = self.indices.next() {
516            if c == '\\' {
517                self.indices.next();
518                continue;
519            }
520
521            if c == '"' {
522                return Token::String(&self.chars[start + 1..ix]);
523            }
524        }
525
526        Token::Error(format!(
527            "Failed to find terminator for string literal at offset {start}"
528        ))
529    }
530}
531
532impl<'a> Iterator for Lexer<'a> {
533    type Item = Token<'a>;
534
535    fn next(&mut self) -> Option<Self::Item> {
536        while let Some((start, c)) = self.indices.next() {
537            match c {
538                '(' => {
539                    return Some(Token::LParen);
540                }
541
542                ')' => {
543                    return Some(Token::RParen);
544                }
545
546                '"' => {
547                    return Some(self.scan_string(start));
548                }
549
550                // this is a bit of a hack, but if we encounter a comment we clear out the indices
551                // iterator as the parser is line oriented.
552                ';' => self.indices = self.chars[0..0].char_indices().peekable(),
553
554                c if c.is_whitespace() => {}
555
556                c => return Some(self.scan_symbol(start, c == '|')),
557            }
558        }
559
560        None
561    }
562}
563
564#[cfg(test)]
565mod tests {
566    use super::{Arena, Parser, SExprData};
567    use crate::ContextBuilder;
568
569    #[test]
570    fn is_atom() {
571        let ctx = ContextBuilder::new().build().unwrap();
572        let pizza = ctx.atom("pizza");
573        assert!(pizza.is_atom());
574        assert!(!pizza.is_list());
575    }
576
577    #[test]
578    fn is_list() {
579        let ctx = ContextBuilder::new().build().unwrap();
580        let toppings = ctx.list(vec![
581            ctx.atom("tomato-sauce"),
582            ctx.atom("mozzarella"),
583            ctx.atom("basil"),
584        ]);
585        assert!(toppings.is_list());
586        assert!(!toppings.is_atom());
587    }
588
589    #[test]
590    fn parses_string_lit() {
591        let arena = Arena::new();
592        let mut p = Parser::new();
593
594        let expr = p.parse(&arena, "(error \"line 3 column 16: Parsing function declaration. Expecting sort list '(' got :a\")").expect("Parsing a list with a string literal");
595
596        assert!(expr.is_list());
597
598        let SExprData::List(es) = arena.get(expr) else {
599            panic!("Failed to parse a list");
600        };
601
602        assert_eq!(es.len(), 2);
603
604        assert!(es[0].is_atom());
605        assert!(es[1].is_string());
606
607        match arena.get(es[1]) {
608            SExprData::String(text) => assert_eq!(
609                text,
610                "line 3 column 16: Parsing function declaration. Expecting sort list '(' got :a"
611            ),
612            _ => unreachable!(),
613        };
614
615        let expr = p
616            .parse(&arena, "\"\"")
617            .expect("Parsing the empty string literal");
618
619        assert!(expr.is_string());
620        match arena.get(expr) {
621            SExprData::String(text) => assert!(text.is_empty()),
622            _ => unreachable!(),
623        }
624    }
625
626    #[test]
627    fn parse_error() {
628        let arena = Arena::new();
629        let mut p = Parser::new();
630
631        let err = p
632            .parse(&arena, "(error \"line)")
633            .expect_err("Unterminated string literal should fail to parse")
634            .expect_message();
635
636        assert_eq!(
637            err,
638            "Failed to find terminator for string literal at offset 7"
639        );
640    }
641
642    #[test]
643    fn parse_multi_line() {
644        let arena = Arena::new();
645        let mut p = Parser::new();
646
647        p.parse(&arena, "(open (extra \"sequence\")")
648            .expect_err("Open list should expect more")
649            .expect_more();
650
651        p.parse(&arena, "b")
652            .expect_err("Single atom doesn't close a list")
653            .expect_more();
654
655        let expr = p.parse(&arena, ")")
656            .expect("Closing paren should finish the parse");
657
658        let SExprData::List(es) = arena.get(expr) else {
659            panic!("Failed to parse a list");
660        };
661
662        assert_eq!(es.len(), 3);
663        assert!(es[0].is_atom());
664        assert!(es[1].is_list());
665        assert!(es[2].is_atom());
666    }
667}