air_parser/
symbols.rs

1use core::fmt;
2use core::mem;
3use core::ops::Deref;
4use core::str;
5
6use std::collections::BTreeMap;
7use std::sync::RwLock;
8
9lazy_static::lazy_static! {
10    static ref SYMBOL_TABLE: SymbolTable = SymbolTable::new();
11}
12
13pub mod predefined {
14    #![allow(non_upper_case_globals)]
15    use super::Symbol;
16
17    /// The symbol `$main`
18    pub const Main: Symbol = Symbol::new(0);
19    /// The symbol `$builtin`
20    pub const Builtin: Symbol = Symbol::new(1);
21    /// The symbol `sum`
22    pub const Sum: Symbol = Symbol::new(2);
23    /// The symbol `prod`
24    pub const Prod: Symbol = Symbol::new(3);
25
26    pub(super) const __SYMBOLS: &[(Symbol, &str)] = &[
27        (Main, "$main"),
28        (Builtin, "$builtin"),
29        (Sum, "sum"),
30        (Prod, "prod"),
31    ];
32}
33
34pub use self::predefined::*;
35
36struct SymbolTable {
37    interner: RwLock<Interner>,
38}
39impl SymbolTable {
40    pub fn new() -> Self {
41        Self {
42            interner: RwLock::new(Interner::new()),
43        }
44    }
45}
46unsafe impl Sync for SymbolTable {}
47
48/// A symbol is an interned string.
49#[derive(Clone, Copy, PartialEq, Eq, Hash)]
50pub struct Symbol(SymbolIndex);
51
52impl Symbol {
53    #[inline]
54    pub const fn new(n: u32) -> Self {
55        Self(SymbolIndex::new(n))
56    }
57
58    /// Maps a string to its interned representation.
59    pub fn intern<S: Into<String>>(string: S) -> Self {
60        let string = string.into();
61        with_interner(|interner| interner.intern(string))
62    }
63
64    pub fn as_str(self) -> &'static str {
65        with_read_only_interner(|interner| unsafe {
66            // This is safe because the interned string will live for the
67            // lifetime of the program
68            mem::transmute::<&str, &'static str>(interner.get(self))
69        })
70    }
71
72    #[inline]
73    pub fn as_u32(self) -> u32 {
74        self.0.as_u32()
75    }
76
77    #[inline]
78    pub fn as_usize(self) -> usize {
79        self.0.as_usize()
80    }
81}
82impl fmt::Debug for Symbol {
83    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
84        write!(f, "{}({:?})", self, self.0)
85    }
86}
87impl fmt::Display for Symbol {
88    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
89        fmt::Display::fmt(&self.as_str(), f)
90    }
91}
92impl PartialOrd for Symbol {
93    fn partial_cmp(&self, other: &Self) -> Option<core::cmp::Ordering> {
94        Some(self.cmp(other))
95    }
96}
97impl Ord for Symbol {
98    fn cmp(&self, other: &Self) -> core::cmp::Ordering {
99        self.as_str().cmp(other.as_str())
100    }
101}
102impl<T: Deref<Target = str>> PartialEq<T> for Symbol {
103    fn eq(&self, other: &T) -> bool {
104        self.as_str() == other.deref()
105    }
106}
107
108#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
109struct SymbolIndex(u32);
110impl SymbolIndex {
111    // shave off 256 indices at the end to allow space for packing these indices into enums
112    pub const MAX_AS_U32: u32 = 0xFFFF_FF00;
113
114    #[inline]
115    const fn new(n: u32) -> Self {
116        assert!(n <= Self::MAX_AS_U32, "out of range value used");
117
118        SymbolIndex(n)
119    }
120
121    #[inline]
122    pub fn as_u32(self) -> u32 {
123        self.0
124    }
125
126    #[inline]
127    pub fn as_usize(self) -> usize {
128        self.0 as usize
129    }
130}
131impl From<SymbolIndex> for u32 {
132    #[inline]
133    fn from(v: SymbolIndex) -> u32 {
134        v.as_u32()
135    }
136}
137impl From<SymbolIndex> for usize {
138    #[inline]
139    fn from(v: SymbolIndex) -> usize {
140        v.as_usize()
141    }
142}
143
144#[derive(Default)]
145struct Interner {
146    pub names: BTreeMap<&'static str, Symbol>,
147    pub strings: Vec<&'static str>,
148}
149
150impl Interner {
151    pub fn new() -> Self {
152        let mut this = Interner::default();
153        for (sym, s) in predefined::__SYMBOLS {
154            this.names.insert(s, *sym);
155            this.strings.push(s);
156        }
157        this
158    }
159
160    pub fn intern(&mut self, string: String) -> Symbol {
161        if let Some(&name) = self.names.get(string.as_str()) {
162            return name;
163        }
164
165        let name = Symbol::new(self.strings.len() as u32);
166
167        let string = string.into_boxed_str();
168        let string: &'static str = Box::leak(string);
169        self.strings.push(string);
170        self.names.insert(string, name);
171        name
172    }
173
174    pub fn get(&self, symbol: Symbol) -> &str {
175        self.strings[symbol.0.as_usize()]
176    }
177}
178
179// If an interner exists, return it. Otherwise, prepare a fresh one.
180#[inline]
181fn with_interner<T, F: FnOnce(&mut Interner) -> T>(f: F) -> T {
182    let mut r = SYMBOL_TABLE.interner.write().unwrap();
183    f(&mut r)
184}
185
186#[inline]
187fn with_read_only_interner<T, F: FnOnce(&Interner) -> T>(f: F) -> T {
188    let r = SYMBOL_TABLE.interner.read().unwrap();
189    f(&r)
190}