mago_interner/
lib.rs

1use std::collections::HashSet;
2use std::num::NonZeroUsize;
3use std::sync::Arc;
4
5use lasso::Key;
6use lasso::Rodeo;
7use lasso::ThreadedRodeo;
8use serde::Deserialize;
9use serde::Serialize;
10
11/// An string identifier that is used to represent an interned string.
12#[derive(Debug, Clone, Copy, Eq, PartialEq, Hash, Serialize, Deserialize, PartialOrd, Ord)]
13#[repr(transparent)]
14pub struct StringIdentifier(pub(crate) usize);
15
16impl StringIdentifier {
17    /// Creates a new empty `StringIdentifier`.
18    #[inline(always)]
19    pub const fn empty() -> Self {
20        Self(0)
21    }
22
23    /// Creates a new `StringIdentifier`.
24    ///
25    /// # Arguments
26    ///
27    /// * `val` - The value of the string identifier.
28    #[inline(always)]
29    pub const fn new(val: NonZeroUsize) -> Self {
30        Self(val.get())
31    }
32
33    /// Returns `true` if the string is empty.
34    #[inline(always)]
35    pub const fn is_empty(&self) -> bool {
36        self.0 == 0
37    }
38
39    /// Returns the value of the string identifier.
40    #[inline(always)]
41    pub const fn value(&self) -> usize {
42        self.0
43    }
44
45    /// Returns `true` if the string identifier is the same as the other.
46    #[inline(always)]
47    pub const fn is_same_as(&self, other: &Self) -> bool {
48        self.0 == other.0
49    }
50}
51
52unsafe impl Key for StringIdentifier {
53    #[inline(always)]
54    fn into_usize(self) -> usize {
55        self.0 - 1
56    }
57
58    #[inline(always)]
59    fn try_from_usize(int: usize) -> Option<Self> {
60        Some(Self::new(NonZeroUsize::new(int + 1)?))
61    }
62}
63
64#[derive(Debug)]
65pub struct Interner {
66    rodeo: Rodeo<StringIdentifier>,
67}
68
69/// A string interner that stores strings and assigns them unique identifiers.
70impl Interner {
71    /// Creates a new `Interner`.
72    pub fn new() -> Self {
73        Self { rodeo: Rodeo::new() }
74    }
75
76    /// Returns the number of strings stored in the interner.
77    #[inline]
78    pub fn len(&self) -> usize {
79        self.rodeo.len()
80    }
81
82    /// Returns `true` if the interner is empty.
83    #[inline]
84    pub fn is_empty(&self) -> bool {
85        self.rodeo.is_empty()
86    }
87
88    /// Returns the identifier for the specified interned string.
89    ///
90    /// # Arguments
91    ///
92    /// * string - The interned string.
93    ///
94    /// # Returns
95    ///
96    /// The identifier for the interned string, or `None` if the string is not interned.
97    #[inline]
98    pub fn get(&self, string: impl AsRef<str>) -> Option<StringIdentifier> {
99        let str = string.as_ref();
100        if str.is_empty() {
101            return Some(StringIdentifier::empty());
102        }
103
104        self.rodeo.get(str)
105    }
106
107    /// Interns the specified string, returning the identifier for it.
108    ///
109    /// If the string is already interned, the existing identifier is returned.
110    ///
111    /// # Arguments
112    ///
113    /// * string - The string to intern.
114    #[inline]
115    pub fn intern(&mut self, string: impl AsRef<str>) -> StringIdentifier {
116        let str = string.as_ref();
117        if str.is_empty() {
118            return StringIdentifier::empty();
119        }
120
121        self.rodeo.get_or_intern(str)
122    }
123
124    /// Interns a string if it has not already been interned, then returns a reference
125    /// to the interned string.
126    ///
127    /// # Arguments
128    ///
129    /// * `string` - A string or any type that implements `AsRef<str>`, representing the
130    ///   string to intern.
131    ///
132    /// # Returns
133    ///
134    /// A reference to the interned version of the string.
135    ///
136    /// # Panics
137    ///
138    /// This method will panic if it encounters an invalid identifier. This should never
139    /// occur unless there is an issue with the identifier or the interner is used
140    /// incorrectly.
141    #[inline]
142    pub fn interned_str(&mut self, string: impl AsRef<str>) -> &str {
143        let str = string.as_ref();
144        if str.is_empty() {
145            return "";
146        }
147
148        let identifier = self.rodeo.get_or_intern(str);
149
150        self.rodeo.try_resolve(&identifier).expect(
151            "invalid string identifier; this should never happen unless the identifier is \
152                corrupted or the interner is used incorrectly",
153        )
154    }
155
156    /// Given an identifier, returns the identifier for the same string but with all
157    /// characters in lowercase.
158    ///
159    /// # Arguments
160    ///
161    /// * `identifier` - The identifier of the string to lower.
162    ///
163    /// # Returns
164    ///
165    /// The identifier of the string with all characters in lowercase.
166    #[inline]
167    pub fn lowered(&mut self, identifier: &StringIdentifier) -> StringIdentifier {
168        let string = self.lookup(identifier);
169
170        self.intern(string.to_ascii_lowercase())
171    }
172
173    /// Returns the interned string for the specified identifier.
174    ///
175    /// # Arguments
176    ///
177    /// * identifier - The identifier to look up.
178    ///
179    /// # Panics
180    ///
181    /// Panics if the identifier is invalid
182    #[inline]
183    pub fn lookup(&self, identifier: &StringIdentifier) -> &str {
184        if identifier.is_empty() {
185            return "";
186        }
187
188        self.rodeo.try_resolve(identifier).expect(
189            "invalid string identifier; this should never happen unless the identifier is \
190                corrupted or the interner is used incorrectly",
191        )
192    }
193}
194
195/// A thread-safe interner, allowing multiple threads to concurrently intern strings.
196#[derive(Debug, Clone)]
197pub struct ThreadedInterner {
198    rodeo: Arc<ThreadedRodeo<StringIdentifier>>,
199}
200
201impl ThreadedInterner {
202    /// Creates a new `ThreadedInterner`.
203    #[inline]
204    pub fn new() -> Self {
205        Self { rodeo: Arc::new(ThreadedRodeo::new()) }
206    }
207
208    /// Returns the number of strings stored in the interner.
209    #[inline]
210    pub fn len(&self) -> usize {
211        self.rodeo.len()
212    }
213
214    /// Returns `true` if the interner is empty.
215    #[inline]
216    pub fn is_empty(&self) -> bool {
217        self.rodeo.is_empty()
218    }
219
220    /// Returns the identifier for the specified interned string.
221    ///
222    /// # Arguments
223    ///
224    /// * `string` - The interned string.
225    ///
226    /// # Returns
227    ///
228    /// The identifier for the interned string, or `None` if the string is not interned.
229    pub fn get(&self, string: impl AsRef<str>) -> Option<StringIdentifier> {
230        let str = string.as_ref();
231        if str.is_empty() {
232            return Some(StringIdentifier::empty());
233        }
234
235        self.rodeo.get(str)
236    }
237
238    /// Interns a string and returns its identifier.
239    ///
240    /// If the string is already interned, the existing identifier is returned.
241    ///
242    /// # Arguments
243    ///
244    /// * `string` - The string to intern.
245    #[inline]
246    pub fn intern(&self, string: impl AsRef<str>) -> StringIdentifier {
247        let str = string.as_ref();
248        if str.is_empty() {
249            return StringIdentifier::empty();
250        }
251
252        self.rodeo.get_or_intern(str)
253    }
254
255    /// Interns a string if it has not already been interned, then returns a reference
256    /// to the interned string.
257    ///
258    /// # Arguments
259    ///
260    /// * `string` - A string or any type that implements `AsRef<str>`, representing the
261    ///   string to intern.
262    ///
263    /// # Returns
264    ///
265    /// A reference to the interned version of the string.
266    ///
267    /// # Panics
268    ///
269    /// This method will panic if it encounters an invalid identifier. This should never
270    /// occur unless there is an issue with the identifier or the interner is used
271    /// incorrectly.
272    #[inline]
273    pub fn interned_str(&self, string: impl AsRef<str>) -> &str {
274        let str = string.as_ref();
275        if str.is_empty() {
276            return "";
277        }
278
279        let identifier = self.rodeo.get_or_intern(str);
280
281        self.rodeo.try_resolve(&identifier).expect(
282            "invalid string identifier; this should never happen unless the identifier is \
283                corrupted or the interner is used incorrectly",
284        )
285    }
286
287    /// Given an identifier, returns the identifier for the same string but with all
288    /// characters in lowercase.
289    ///
290    /// # Arguments
291    ///
292    /// * `identifier` - The identifier of the string to lower.
293    ///
294    /// # Returns
295    ///
296    /// The identifier of the string with all characters in lowercase.
297    #[inline]
298    pub fn lowered(&self, identifier: &StringIdentifier) -> StringIdentifier {
299        let string = self.lookup(identifier);
300
301        self.intern(string.to_ascii_lowercase())
302    }
303
304    /// Looks up an interned string by its identifier.
305    ///
306    /// # Arguments
307    ///
308    /// * `identifier` - The identifier of the interned string to look up.
309    ///
310    /// # Panics
311    ///
312    /// This method will panic if it encounters an invalid identifier. This should never
313    /// occur unless there is an issue with the identifier or the interner is used
314    /// incorrectly.
315    #[inline]
316    pub fn lookup(&self, identifier: &StringIdentifier) -> &str {
317        if identifier.is_empty() {
318            return "";
319        }
320
321        self.rodeo.try_resolve(identifier).expect(
322            "invalid string identifier; this should never happen unless the identifier is \
323                corrupted or the interner is used incorrectly",
324        )
325    }
326
327    /// Returns all interned strings and their identifiers as a hashmap.
328    #[inline]
329    pub fn all(&self) -> HashSet<(StringIdentifier, &str)> {
330        self.rodeo.iter().collect()
331    }
332}
333
334impl std::fmt::Display for StringIdentifier {
335    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
336        write!(f, "string-identifier({})", self.0)
337    }
338}
339
340unsafe impl Send for ThreadedInterner {}
341unsafe impl Sync for ThreadedInterner {}
342
343impl std::default::Default for Interner {
344    #[inline]
345    fn default() -> Self {
346        Self::new()
347    }
348}
349
350impl std::default::Default for ThreadedInterner {
351    #[inline]
352    fn default() -> Self {
353        Self::new()
354    }
355}