sphinx/runtime/strings/
intern.rs

1use core::fmt;
2use core::cmp;
3use core::cell::RefCell;
4use core::marker::PhantomData;
5use core::hash::{Hash, Hasher, BuildHasher};
6use string_interner::{self, DefaultBackend};
7use string_interner::symbol::Symbol;
8
9use crate::language::InternSymbol;
10use crate::runtime::DefaultBuildHasher;
11
12
13thread_local! {
14    pub static STRING_TABLE: RefCell<StringTable> = RefCell::new(StringTable::new());
15}
16
17// Helper macro for static interned strings
18#[macro_export]
19macro_rules! static_symbol {
20    ($str:expr) => {
21        {
22            type StringSymbol = crate::runtime::strings::StringSymbol;
23            thread_local! {
24                static SYMBOL: StringSymbol = StringSymbol::from($str);
25            }
26            SYMBOL.with(|symbol| *symbol)
27        }
28    };
29}
30
31pub use static_symbol;
32
33// Interned Strings
34
35type PhantomUnsend = PhantomData<*mut ()>;
36
37#[derive(Clone, Copy, PartialEq, Eq, Hash)]
38pub struct StringSymbol(InternSymbol, PhantomUnsend);
39
40// Not Send because we depend on the thread-local string table.
41// (We can Send strings with some extra work, just not StringSymbols)
42// impl !Send for StringSymbol { }
43
44impl StringSymbol {
45    fn as_usize(&self) -> usize {
46        self.0.to_usize()
47    }
48    
49    /// Interns a string slice, creating a `StringSymbol`
50    pub fn intern(string: &str) -> Self {
51        STRING_TABLE.with(|string_table| string_table.borrow_mut().get_or_intern(string))
52    }
53    
54    pub fn write(&self, buf: &mut impl fmt::Write) -> fmt::Result {
55        STRING_TABLE.with(|string_table| buf.write_str(
56            string_table.borrow().resolve(self)
57        ))
58    }
59}
60
61
62// not implementing Deref for StringSymbol because I don't want to hide the cost of acquiring a read lock
63
64impl From<StringSymbol> for InternSymbol {
65    fn from(intern: StringSymbol) -> Self {
66        intern.0
67    }
68}
69
70impl From<InternSymbol> for StringSymbol {
71    fn from(symbol: InternSymbol) -> Self {
72        Self(symbol, PhantomData)
73    }
74}
75
76impl From<&str> for StringSymbol {
77    fn from(string: &str) -> Self {
78        Self::intern(string)
79    }
80}
81
82// Lexicographical ordering of strings
83impl PartialOrd for StringSymbol {
84    fn partial_cmp(&self, other: &Self) -> Option<cmp::Ordering> {
85        STRING_TABLE.with(|string_table| {
86            let string_table = string_table.borrow();
87            
88            <str as PartialOrd>::partial_cmp(
89                string_table.resolve(self),
90                string_table.resolve(other),
91            )
92        })
93
94    }
95}
96
97impl Ord for StringSymbol {
98    fn cmp(&self, other: &Self) -> cmp::Ordering {
99        STRING_TABLE.with(|string_table| {
100            let string_table = string_table.borrow();
101            
102            <str as Ord>::cmp(
103                string_table.resolve(self),
104                string_table.resolve(other),
105            )
106        })
107    }
108}
109
110impl fmt::Debug for StringSymbol {
111    fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result {
112        fmt.debug_tuple("StringSymbol")
113            .field(&self.as_usize())
114            .finish()
115    }
116}
117
118impl fmt::Display for StringSymbol {
119    fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result {
120        self.write(fmt)
121    }
122}
123
124
125type InternBackend = DefaultBackend<InternSymbol>;
126
127// StringInterner is used for storage of strings in code units during compilation,
128// StringTable is used for string symbol lookups at runtime
129pub type StringInterner = string_interner::StringInterner<InternBackend, DefaultBuildHasher>;
130
131pub type StringBuildHasher = DefaultBuildHasher;
132
133#[derive(Clone)]
134pub struct StringTable {
135    interner: StringInterner,
136    hasher_factory: StringBuildHasher,
137    hashes: Vec<u64>,  // hash cache
138}
139
140impl Default for StringTable {
141    fn default() -> Self { Self::new() }
142}
143
144impl StringTable {
145    pub fn new() -> Self {
146        StringTable {
147            interner: StringInterner::new(),
148            hasher_factory: StringBuildHasher::default(),
149            hashes: Vec::new(),
150        }
151    }
152    
153    pub fn hasher(&self) -> &impl BuildHasher {
154        &self.hasher_factory
155    }
156    
157    pub fn hash_str(&self, string: &str) -> u64 {
158        let mut state = self.hasher_factory.build_hasher();
159        string.hash(&mut state);
160        state.finish()
161    }
162    
163    pub fn get(&self, string: &str) -> Option<StringSymbol> {
164        self.interner.get(string).map(|symbol| symbol.into())
165    }
166    
167    pub fn get_or_intern(&mut self, string: &str) -> StringSymbol {
168        let symbol = self.interner.get_or_intern(string);
169        
170        // this works because symbols are generated with contiguous values
171        debug_assert!(symbol.to_usize() <= self.hashes.len());
172        if symbol.to_usize() == self.hashes.len() {
173            self.hashes.push(self.hash_str(string))
174        }
175        
176        symbol.into()
177    }
178    
179    pub fn resolve(&self, symbol: &StringSymbol) -> &str {
180        let symbol = InternSymbol::from(*symbol);
181        self.interner.resolve(symbol).expect("invalid symbol")
182    }
183    
184    pub fn lookup_hash(&self, symbol: &StringSymbol) -> u64 {
185        *self.hashes.get(symbol.as_usize()).expect("invalid symbol")
186    }
187    
188    // pub fn into_iter(&self) -> impl Iterator<Item=(StringSymbol, &str)> {
189    //     self.interner.into_iter().map(|(symbol, string)| (symbol.into(), string))
190    // }
191    
192}
193
194impl<'s> Extend<&'s str> for StringTable {
195    fn extend<T>(&mut self, iter: T) where T: IntoIterator<Item=&'s str> {
196        for string in iter.into_iter() {
197            self.get_or_intern(string);
198        }
199    }
200}