mago_interner/lib.rs
1use std::collections::HashSet;
2use std::num::NonZeroUsize;
3use std::sync::Arc;
4
5use lasso::Key;
6use lasso::Rodeo;
7use lasso::ThreadedRodeo;
8use serde::Deserialize;
9use serde::Serialize;
10
11/// An string identifier that is used to represent an interned string.
12#[derive(Debug, Clone, Copy, Eq, PartialEq, Hash, Serialize, Deserialize, PartialOrd, Ord)]
13#[repr(transparent)]
14pub struct StringIdentifier(pub(crate) usize);
15
16impl StringIdentifier {
17 /// Creates a new empty `StringIdentifier`.
18 #[inline(always)]
19 pub const fn empty() -> Self {
20 Self(0)
21 }
22
23 /// Creates a new `StringIdentifier`.
24 ///
25 /// # Arguments
26 ///
27 /// * `val` - The value of the string identifier.
28 #[inline(always)]
29 pub const fn new(val: NonZeroUsize) -> Self {
30 Self(val.get())
31 }
32
33 /// Returns `true` if the string is empty.
34 #[inline(always)]
35 pub const fn is_empty(&self) -> bool {
36 self.0 == 0
37 }
38
39 /// Returns the value of the string identifier.
40 #[inline(always)]
41 pub const fn value(&self) -> usize {
42 self.0
43 }
44
45 /// Returns `true` if the string identifier is the same as the other.
46 #[inline(always)]
47 pub const fn is_same_as(&self, other: &Self) -> bool {
48 self.0 == other.0
49 }
50}
51
52unsafe impl Key for StringIdentifier {
53 #[inline(always)]
54 fn into_usize(self) -> usize {
55 self.0 - 1
56 }
57
58 #[inline(always)]
59 fn try_from_usize(int: usize) -> Option<Self> {
60 Some(Self::new(NonZeroUsize::new(int + 1)?))
61 }
62}
63
64#[derive(Debug)]
65pub struct Interner {
66 rodeo: Rodeo<StringIdentifier>,
67}
68
69/// A string interner that stores strings and assigns them unique identifiers.
70impl Interner {
71 /// Creates a new `Interner`.
72 pub fn new() -> Self {
73 Self { rodeo: Rodeo::new() }
74 }
75
76 /// Returns the number of strings stored in the interner.
77 #[inline]
78 pub fn len(&self) -> usize {
79 self.rodeo.len()
80 }
81
82 /// Returns `true` if the interner is empty.
83 #[inline]
84 pub fn is_empty(&self) -> bool {
85 self.rodeo.is_empty()
86 }
87
88 /// Returns the identifier for the specified interned string.
89 ///
90 /// # Arguments
91 ///
92 /// * string - The interned string.
93 ///
94 /// # Returns
95 ///
96 /// The identifier for the interned string, or `None` if the string is not interned.
97 #[inline]
98 pub fn get(&self, string: impl AsRef<str>) -> Option<StringIdentifier> {
99 let str = string.as_ref();
100 if str.is_empty() {
101 return Some(StringIdentifier::empty());
102 }
103
104 self.rodeo.get(str)
105 }
106
107 /// Interns the specified string, returning the identifier for it.
108 ///
109 /// If the string is already interned, the existing identifier is returned.
110 ///
111 /// # Arguments
112 ///
113 /// * string - The string to intern.
114 #[inline]
115 pub fn intern(&mut self, string: impl AsRef<str>) -> StringIdentifier {
116 let str = string.as_ref();
117 if str.is_empty() {
118 return StringIdentifier::empty();
119 }
120
121 self.rodeo.get_or_intern(str)
122 }
123
124 /// Interns a string if it has not already been interned, then returns a reference
125 /// to the interned string.
126 ///
127 /// # Arguments
128 ///
129 /// * `string` - A string or any type that implements `AsRef<str>`, representing the
130 /// string to intern.
131 ///
132 /// # Returns
133 ///
134 /// A reference to the interned version of the string.
135 ///
136 /// # Panics
137 ///
138 /// This method will panic if it encounters an invalid identifier. This should never
139 /// occur unless there is an issue with the identifier or the interner is used
140 /// incorrectly.
141 #[inline]
142 pub fn interned_str(&mut self, string: impl AsRef<str>) -> &str {
143 let str = string.as_ref();
144 if str.is_empty() {
145 return "";
146 }
147
148 let identifier = self.rodeo.get_or_intern(str);
149
150 self.rodeo.try_resolve(&identifier).expect(
151 "invalid string identifier; this should never happen unless the identifier is \
152 corrupted or the interner is used incorrectly",
153 )
154 }
155
156 /// Given an identifier, returns the identifier for the same string but with all
157 /// characters in lowercase.
158 ///
159 /// # Arguments
160 ///
161 /// * `identifier` - The identifier of the string to lower.
162 ///
163 /// # Returns
164 ///
165 /// The identifier of the string with all characters in lowercase.
166 #[inline]
167 pub fn lowered(&mut self, identifier: &StringIdentifier) -> StringIdentifier {
168 let string = self.lookup(identifier);
169
170 self.intern(string.to_ascii_lowercase())
171 }
172
173 /// Returns the interned string for the specified identifier.
174 ///
175 /// # Arguments
176 ///
177 /// * identifier - The identifier to look up.
178 ///
179 /// # Panics
180 ///
181 /// Panics if the identifier is invalid
182 #[inline]
183 pub fn lookup(&self, identifier: &StringIdentifier) -> &str {
184 if identifier.is_empty() {
185 return "";
186 }
187
188 self.rodeo.try_resolve(identifier).expect(
189 "invalid string identifier; this should never happen unless the identifier is \
190 corrupted or the interner is used incorrectly",
191 )
192 }
193}
194
195/// A thread-safe interner, allowing multiple threads to concurrently intern strings.
196#[derive(Debug, Clone)]
197pub struct ThreadedInterner {
198 rodeo: Arc<ThreadedRodeo<StringIdentifier>>,
199}
200
201impl ThreadedInterner {
202 /// Creates a new `ThreadedInterner`.
203 #[inline]
204 pub fn new() -> Self {
205 Self { rodeo: Arc::new(ThreadedRodeo::new()) }
206 }
207
208 /// Returns the number of strings stored in the interner.
209 #[inline]
210 pub fn len(&self) -> usize {
211 self.rodeo.len()
212 }
213
214 /// Returns `true` if the interner is empty.
215 #[inline]
216 pub fn is_empty(&self) -> bool {
217 self.rodeo.is_empty()
218 }
219
220 /// Returns the identifier for the specified interned string.
221 ///
222 /// # Arguments
223 ///
224 /// * `string` - The interned string.
225 ///
226 /// # Returns
227 ///
228 /// The identifier for the interned string, or `None` if the string is not interned.
229 pub fn get(&self, string: impl AsRef<str>) -> Option<StringIdentifier> {
230 let str = string.as_ref();
231 if str.is_empty() {
232 return Some(StringIdentifier::empty());
233 }
234
235 self.rodeo.get(str)
236 }
237
238 /// Interns a string and returns its identifier.
239 ///
240 /// If the string is already interned, the existing identifier is returned.
241 ///
242 /// # Arguments
243 ///
244 /// * `string` - The string to intern.
245 #[inline]
246 pub fn intern(&self, string: impl AsRef<str>) -> StringIdentifier {
247 let str = string.as_ref();
248 if str.is_empty() {
249 return StringIdentifier::empty();
250 }
251
252 self.rodeo.get_or_intern(str)
253 }
254
255 /// Interns a string if it has not already been interned, then returns a reference
256 /// to the interned string.
257 ///
258 /// # Arguments
259 ///
260 /// * `string` - A string or any type that implements `AsRef<str>`, representing the
261 /// string to intern.
262 ///
263 /// # Returns
264 ///
265 /// A reference to the interned version of the string.
266 ///
267 /// # Panics
268 ///
269 /// This method will panic if it encounters an invalid identifier. This should never
270 /// occur unless there is an issue with the identifier or the interner is used
271 /// incorrectly.
272 #[inline]
273 pub fn interned_str(&self, string: impl AsRef<str>) -> &str {
274 let str = string.as_ref();
275 if str.is_empty() {
276 return "";
277 }
278
279 let identifier = self.rodeo.get_or_intern(str);
280
281 self.rodeo.try_resolve(&identifier).expect(
282 "invalid string identifier; this should never happen unless the identifier is \
283 corrupted or the interner is used incorrectly",
284 )
285 }
286
287 /// Given an identifier, returns the identifier for the same string but with all
288 /// characters in lowercase.
289 ///
290 /// # Arguments
291 ///
292 /// * `identifier` - The identifier of the string to lower.
293 ///
294 /// # Returns
295 ///
296 /// The identifier of the string with all characters in lowercase.
297 #[inline]
298 pub fn lowered(&self, identifier: &StringIdentifier) -> StringIdentifier {
299 let string = self.lookup(identifier);
300
301 self.intern(string.to_ascii_lowercase())
302 }
303
304 /// Looks up an interned string by its identifier.
305 ///
306 /// # Arguments
307 ///
308 /// * `identifier` - The identifier of the interned string to look up.
309 ///
310 /// # Panics
311 ///
312 /// This method will panic if it encounters an invalid identifier. This should never
313 /// occur unless there is an issue with the identifier or the interner is used
314 /// incorrectly.
315 #[inline]
316 pub fn lookup(&self, identifier: &StringIdentifier) -> &str {
317 if identifier.is_empty() {
318 return "";
319 }
320
321 self.rodeo.try_resolve(identifier).expect(
322 "invalid string identifier; this should never happen unless the identifier is \
323 corrupted or the interner is used incorrectly",
324 )
325 }
326
327 /// Returns all interned strings and their identifiers as a hashmap.
328 #[inline]
329 pub fn all(&self) -> HashSet<(StringIdentifier, &str)> {
330 self.rodeo.iter().collect()
331 }
332}
333
334impl std::fmt::Display for StringIdentifier {
335 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
336 write!(f, "string-identifier({})", self.0)
337 }
338}
339
340unsafe impl Send for ThreadedInterner {}
341unsafe impl Sync for ThreadedInterner {}
342
343impl std::default::Default for Interner {
344 #[inline]
345 fn default() -> Self {
346 Self::new()
347 }
348}
349
350impl std::default::Default for ThreadedInterner {
351 #[inline]
352 fn default() -> Self {
353 Self::new()
354 }
355}