mago_interner/lib.rs
1use std::collections::HashSet;
2use std::num::NonZeroUsize;
3use std::sync::Arc;
4
5use lasso::Key;
6use lasso::Rodeo;
7use lasso::ThreadedRodeo;
8use serde::Deserialize;
9use serde::Serialize;
10
11/// An string identifier that is used to represent an interned string.
12#[derive(Debug, Clone, Copy, Eq, PartialEq, Hash, Serialize, Deserialize, PartialOrd, Ord)]
13#[repr(transparent)]
14pub struct StringIdentifier(pub(crate) usize);
15
16impl StringIdentifier {
17 /// Creates a new empty `StringIdentifier`.
18 #[inline(always)]
19 pub const fn empty() -> Self {
20 Self(0)
21 }
22
23 /// Creates a new `StringIdentifier`.
24 ///
25 /// # Arguments
26 ///
27 /// * `val` - The value of the string identifier.
28 #[inline(always)]
29 pub const fn new(val: NonZeroUsize) -> Self {
30 Self(val.get())
31 }
32
33 /// Returns `true` if the string is empty.
34 #[inline(always)]
35 pub const fn is_empty(&self) -> bool {
36 self.0 == 0
37 }
38
39 /// Returns the value of the string identifier.
40 #[inline(always)]
41 pub const fn value(&self) -> usize {
42 self.0
43 }
44
45 /// Returns `true` if the string identifier is the same as the other.
46 #[inline(always)]
47 pub const fn is_same_as(&self, other: &Self) -> bool {
48 self.0 == other.0
49 }
50}
51
52unsafe impl Key for StringIdentifier {
53 #[inline(always)]
54 fn into_usize(self) -> usize {
55 self.0 - 1
56 }
57
58 #[inline(always)]
59 fn try_from_usize(int: usize) -> Option<Self> {
60 Some(Self::new(NonZeroUsize::new(int + 1)?))
61 }
62}
63
64#[derive(Debug)]
65pub struct Interner {
66 rodeo: Rodeo<StringIdentifier>,
67}
68
69/// A string interner that stores strings and assigns them unique identifiers.
70impl Interner {
71 /// Creates a new `Interner`.
72 pub fn new() -> Self {
73 Self { rodeo: Rodeo::new() }
74 }
75
76 /// Returns the number of strings stored in the interner.
77 #[inline]
78 pub fn len(&self) -> usize {
79 self.rodeo.len()
80 }
81
82 /// Returns `true` if the interner is empty.
83 #[inline]
84 pub fn is_empty(&self) -> bool {
85 self.rodeo.is_empty()
86 }
87
88 /// Returns the identifier for the specified interned string.
89 ///
90 /// # Arguments
91 ///
92 /// * string - The interned string.
93 #[inline]
94 pub fn get(&self, string: impl AsRef<str>) -> Option<StringIdentifier> {
95 let str = string.as_ref();
96 if str.is_empty() {
97 return Some(StringIdentifier::empty());
98 }
99
100 self.rodeo.get(str)
101 }
102
103 /// Interns the specified string, returning the identifier for it.
104 ///
105 /// If the string is already interned, the existing identifier is returned.
106 ///
107 /// # Arguments
108 ///
109 /// * string - The string to intern.
110 #[inline]
111 pub fn intern(&mut self, string: impl AsRef<str>) -> StringIdentifier {
112 let str = string.as_ref();
113 if str.is_empty() {
114 return StringIdentifier::empty();
115 }
116
117 self.rodeo.get_or_intern(str)
118 }
119
120 /// Interns a string if it has not already been interned, then returns a reference
121 /// to the interned string.
122 ///
123 /// # Arguments
124 ///
125 /// * `string` - A string or any type that implements `AsRef<str>`, representing the
126 /// string to intern.
127 ///
128 /// # Returns
129 ///
130 /// A reference to the interned version of the string.
131 ///
132 /// # Panics
133 ///
134 /// This method will panic if it encounters an invalid identifier. This should never
135 /// occur unless there is an issue with the identifier or the interner is used
136 /// incorrectly.
137 #[inline]
138 pub fn interned_str(&mut self, string: impl AsRef<str>) -> &str {
139 let str = string.as_ref();
140 if str.is_empty() {
141 return "";
142 }
143
144 let identifier = self.rodeo.get_or_intern(str);
145
146 self.rodeo.try_resolve(&identifier).expect(
147 "invalid string identifier; this should never happen unless the identifier is \
148 corrupted or the interner is used incorrectly",
149 )
150 }
151
152 /// Given an identifier, returns the identifier for the same string but with all
153 /// characters in lowercase.
154 ///
155 /// # Arguments
156 ///
157 /// * `identifier` - The identifier of the string to lower.
158 ///
159 /// # Returns
160 ///
161 /// The identifier of the string with all characters in lowercase.
162 #[inline]
163 pub fn lowered(&mut self, identifier: &StringIdentifier) -> StringIdentifier {
164 let string = self.lookup(identifier);
165
166 self.intern(string.to_ascii_lowercase())
167 }
168
169 /// Returns the interned string for the specified identifier.
170 ///
171 /// # Arguments
172 ///
173 /// * identifier - The identifier to look up.
174 ///
175 /// # Panics
176 ///
177 /// Panics if the identifier is invalid
178 #[inline]
179 pub fn lookup(&self, identifier: &StringIdentifier) -> &str {
180 if identifier.is_empty() {
181 return "";
182 }
183
184 self.rodeo.try_resolve(identifier).expect(
185 "invalid string identifier; this should never happen unless the identifier is \
186 corrupted or the interner is used incorrectly",
187 )
188 }
189}
190
191/// A thread-safe interner, allowing multiple threads to concurrently intern strings.
192#[derive(Debug, Clone)]
193pub struct ThreadedInterner {
194 rodeo: Arc<ThreadedRodeo<StringIdentifier>>,
195}
196
197impl ThreadedInterner {
198 /// Creates a new `ThreadedInterner`.
199 #[inline]
200 pub fn new() -> Self {
201 Self { rodeo: Arc::new(ThreadedRodeo::new()) }
202 }
203
204 /// Returns the number of strings stored in the interner.
205 #[inline]
206 pub fn len(&self) -> usize {
207 self.rodeo.len()
208 }
209
210 /// Returns `true` if the interner is empty.
211 #[inline]
212 pub fn is_empty(&self) -> bool {
213 self.rodeo.is_empty()
214 }
215
216 /// Interns a string and returns its identifier.
217 ///
218 /// If the string is already interned, the existing identifier is returned.
219 ///
220 /// # Arguments
221 ///
222 /// * `string` - The string to intern.
223 #[inline]
224 pub fn intern(&self, string: impl AsRef<str>) -> StringIdentifier {
225 let str = string.as_ref();
226 if str.is_empty() {
227 return StringIdentifier::empty();
228 }
229
230 self.rodeo.get_or_intern(str)
231 }
232
233 /// Interns a string if it has not already been interned, then returns a reference
234 /// to the interned string.
235 ///
236 /// # Arguments
237 ///
238 /// * `string` - A string or any type that implements `AsRef<str>`, representing the
239 /// string to intern.
240 ///
241 /// # Returns
242 ///
243 /// A reference to the interned version of the string.
244 ///
245 /// # Panics
246 ///
247 /// This method will panic if it encounters an invalid identifier. This should never
248 /// occur unless there is an issue with the identifier or the interner is used
249 /// incorrectly.
250 #[inline]
251 pub fn interned_str(&self, string: impl AsRef<str>) -> &str {
252 let str = string.as_ref();
253 if str.is_empty() {
254 return "";
255 }
256
257 let identifier = self.rodeo.get_or_intern(str);
258
259 self.rodeo.try_resolve(&identifier).expect(
260 "invalid string identifier; this should never happen unless the identifier is \
261 corrupted or the interner is used incorrectly",
262 )
263 }
264
265 /// Given an identifier, returns the identifier for the same string but with all
266 /// characters in lowercase.
267 ///
268 /// # Arguments
269 ///
270 /// * `identifier` - The identifier of the string to lower.
271 ///
272 /// # Returns
273 ///
274 /// The identifier of the string with all characters in lowercase.
275 #[inline]
276 pub fn lowered(&self, identifier: &StringIdentifier) -> StringIdentifier {
277 let string = self.lookup(identifier);
278
279 self.intern(string.to_ascii_lowercase())
280 }
281
282 /// Looks up an interned string by its identifier.
283 ///
284 /// # Arguments
285 ///
286 /// * `identifier` - The identifier of the interned string to look up.
287 ///
288 /// # Panics
289 ///
290 /// This method will panic if it encounters an invalid identifier. This should never
291 /// occur unless there is an issue with the identifier or the interner is used
292 /// incorrectly.
293 #[inline]
294 pub fn lookup(&self, identifier: &StringIdentifier) -> &str {
295 if identifier.is_empty() {
296 return "";
297 }
298
299 self.rodeo.try_resolve(identifier).expect(
300 "invalid string identifier; this should never happen unless the identifier is \
301 corrupted or the interner is used incorrectly",
302 )
303 }
304
305 /// Returns all interned strings and their identifiers as a hashmap.
306 #[inline]
307 pub fn all(&self) -> HashSet<(StringIdentifier, &str)> {
308 self.rodeo.iter().collect()
309 }
310}
311
312impl std::fmt::Display for StringIdentifier {
313 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
314 write!(f, "string-identifier({})", self.0)
315 }
316}
317
318unsafe impl Send for ThreadedInterner {}
319unsafe impl Sync for ThreadedInterner {}
320
321impl std::default::Default for Interner {
322 #[inline]
323 fn default() -> Self {
324 Self::new()
325 }
326}
327
328impl std::default::Default for ThreadedInterner {
329 #[inline]
330 fn default() -> Self {
331 Self::new()
332 }
333}