air_parser/
symbols.rs

1use core::fmt;
2use core::mem;
3use core::ops::Deref;
4use core::str;
5
6use std::collections::BTreeMap;
7use std::sync::RwLock;
8
9lazy_static::lazy_static! {
10    static ref SYMBOL_TABLE: SymbolTable = SymbolTable::new();
11}
12
13pub mod predefined {
14    #![allow(non_upper_case_globals)]
15    use super::Symbol;
16
17    /// The symbol `$main`
18    pub const Main: Symbol = Symbol::new(0);
19    /// The symbol `$aux`
20    pub const Aux: Symbol = Symbol::new(1);
21    /// The symbol `$builtin`
22    pub const Builtin: Symbol = Symbol::new(2);
23    /// The symbol `sum`
24    pub const Sum: Symbol = Symbol::new(3);
25    /// The symbol `prod`
26    pub const Prod: Symbol = Symbol::new(4);
27
28    pub(super) const __SYMBOLS: &[(Symbol, &str)] = &[
29        (Main, "$main"),
30        (Aux, "$aux"),
31        (Builtin, "$builtin"),
32        (Sum, "sum"),
33        (Prod, "prod"),
34    ];
35}
36
37pub use self::predefined::*;
38
39struct SymbolTable {
40    interner: RwLock<Interner>,
41}
42impl SymbolTable {
43    pub fn new() -> Self {
44        Self {
45            interner: RwLock::new(Interner::new()),
46        }
47    }
48}
49unsafe impl Sync for SymbolTable {}
50
51/// A symbol is an interned string.
52#[derive(Clone, Copy, PartialEq, Eq, Hash)]
53pub struct Symbol(SymbolIndex);
54
55impl Symbol {
56    #[inline]
57    pub const fn new(n: u32) -> Self {
58        Self(SymbolIndex::new(n))
59    }
60
61    /// Maps a string to its interned representation.
62    pub fn intern<S: Into<String>>(string: S) -> Self {
63        let string = string.into();
64        with_interner(|interner| interner.intern(string))
65    }
66
67    pub fn as_str(self) -> &'static str {
68        with_read_only_interner(|interner| unsafe {
69            // This is safe because the interned string will live for the
70            // lifetime of the program
71            mem::transmute::<&str, &'static str>(interner.get(self))
72        })
73    }
74
75    #[inline]
76    pub fn as_u32(self) -> u32 {
77        self.0.as_u32()
78    }
79
80    #[inline]
81    pub fn as_usize(self) -> usize {
82        self.0.as_usize()
83    }
84}
85impl fmt::Debug for Symbol {
86    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
87        write!(f, "{}({:?})", self, self.0)
88    }
89}
90impl fmt::Display for Symbol {
91    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
92        fmt::Display::fmt(&self.as_str(), f)
93    }
94}
95impl PartialOrd for Symbol {
96    fn partial_cmp(&self, other: &Self) -> Option<core::cmp::Ordering> {
97        Some(self.cmp(other))
98    }
99}
100impl Ord for Symbol {
101    fn cmp(&self, other: &Self) -> core::cmp::Ordering {
102        self.as_str().cmp(other.as_str())
103    }
104}
105impl<T: Deref<Target = str>> PartialEq<T> for Symbol {
106    fn eq(&self, other: &T) -> bool {
107        self.as_str() == other.deref()
108    }
109}
110
111#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
112struct SymbolIndex(u32);
113impl SymbolIndex {
114    // shave off 256 indices at the end to allow space for packing these indices into enums
115    pub const MAX_AS_U32: u32 = 0xFFFF_FF00;
116
117    #[inline]
118    const fn new(n: u32) -> Self {
119        assert!(n <= Self::MAX_AS_U32, "out of range value used");
120
121        SymbolIndex(n)
122    }
123
124    #[inline]
125    pub fn as_u32(self) -> u32 {
126        self.0
127    }
128
129    #[inline]
130    pub fn as_usize(self) -> usize {
131        self.0 as usize
132    }
133}
134impl From<SymbolIndex> for u32 {
135    #[inline]
136    fn from(v: SymbolIndex) -> u32 {
137        v.as_u32()
138    }
139}
140impl From<SymbolIndex> for usize {
141    #[inline]
142    fn from(v: SymbolIndex) -> usize {
143        v.as_usize()
144    }
145}
146
147#[derive(Default)]
148struct Interner {
149    pub names: BTreeMap<&'static str, Symbol>,
150    pub strings: Vec<&'static str>,
151}
152
153impl Interner {
154    pub fn new() -> Self {
155        let mut this = Interner::default();
156        for (sym, s) in predefined::__SYMBOLS {
157            this.names.insert(s, *sym);
158            this.strings.push(s);
159        }
160        this
161    }
162
163    pub fn intern(&mut self, string: String) -> Symbol {
164        if let Some(&name) = self.names.get(string.as_str()) {
165            return name;
166        }
167
168        let name = Symbol::new(self.strings.len() as u32);
169
170        let string = string.into_boxed_str();
171        let string: &'static str = Box::leak(string);
172        self.strings.push(string);
173        self.names.insert(string, name);
174        name
175    }
176
177    pub fn get(&self, symbol: Symbol) -> &str {
178        self.strings[symbol.0.as_usize()]
179    }
180}
181
182// If an interner exists, return it. Otherwise, prepare a fresh one.
183#[inline]
184fn with_interner<T, F: FnOnce(&mut Interner) -> T>(f: F) -> T {
185    let mut r = SYMBOL_TABLE.interner.write().unwrap();
186    f(&mut r)
187}
188
189#[inline]
190fn with_read_only_interner<T, F: FnOnce(&Interner) -> T>(f: F) -> T {
191    let r = SYMBOL_TABLE.interner.read().unwrap();
192    f(&r)
193}