mago_interner/
lib.rs

1use std::collections::HashSet;
2use std::num::NonZeroUsize;
3use std::sync::Arc;
4
5use lasso::Key;
6use lasso::Rodeo;
7use lasso::ThreadedRodeo;
8use serde::Deserialize;
9use serde::Serialize;
10
11/// An string identifier that is used to represent an interned string.
12#[derive(Debug, Clone, Copy, Eq, PartialEq, Hash, Serialize, Deserialize, PartialOrd, Ord)]
13#[repr(transparent)]
14pub struct StringIdentifier(pub(crate) usize);
15
16impl StringIdentifier {
17    /// Creates a new empty `StringIdentifier`.
18    #[inline(always)]
19    pub const fn empty() -> Self {
20        Self(0)
21    }
22
23    /// Creates a new `StringIdentifier`.
24    ///
25    /// # Arguments
26    ///
27    /// * `val` - The value of the string identifier.
28    #[inline(always)]
29    pub const fn new(val: NonZeroUsize) -> Self {
30        Self(val.get())
31    }
32
33    /// Returns `true` if the string is empty.
34    #[inline(always)]
35    pub const fn is_empty(&self) -> bool {
36        self.0 == 0
37    }
38
39    /// Returns the value of the string identifier.
40    #[inline(always)]
41    pub const fn value(&self) -> usize {
42        self.0
43    }
44
45    /// Returns `true` if the string identifier is the same as the other.
46    #[inline(always)]
47    pub const fn is_same_as(&self, other: &Self) -> bool {
48        self.0 == other.0
49    }
50}
51
52unsafe impl Key for StringIdentifier {
53    #[inline(always)]
54    fn into_usize(self) -> usize {
55        self.0 - 1
56    }
57
58    #[inline(always)]
59    fn try_from_usize(int: usize) -> Option<Self> {
60        Some(Self::new(NonZeroUsize::new(int + 1)?))
61    }
62}
63
64#[derive(Debug)]
65pub struct Interner {
66    rodeo: Rodeo<StringIdentifier>,
67}
68
69/// A string interner that stores strings and assigns them unique identifiers.
70impl Interner {
71    /// Creates a new `Interner`.
72    pub fn new() -> Self {
73        Self { rodeo: Rodeo::new() }
74    }
75
76    /// Returns the number of strings stored in the interner.
77    #[inline]
78    pub fn len(&self) -> usize {
79        self.rodeo.len()
80    }
81
82    /// Returns `true` if the interner is empty.
83    #[inline]
84    pub fn is_empty(&self) -> bool {
85        self.rodeo.is_empty()
86    }
87
88    /// Returns the identifier for the specified interned string.
89    ///
90    /// # Arguments
91    ///
92    /// * string - The interned string.
93    #[inline]
94    pub fn get(&self, string: impl AsRef<str>) -> Option<StringIdentifier> {
95        let str = string.as_ref();
96        if str.is_empty() {
97            return Some(StringIdentifier::empty());
98        }
99
100        self.rodeo.get(str)
101    }
102
103    /// Interns the specified string, returning the identifier for it.
104    ///
105    /// If the string is already interned, the existing identifier is returned.
106    ///
107    /// # Arguments
108    ///
109    /// * string - The string to intern.
110    #[inline]
111    pub fn intern(&mut self, string: impl AsRef<str>) -> StringIdentifier {
112        let str = string.as_ref();
113        if str.is_empty() {
114            return StringIdentifier::empty();
115        }
116
117        self.rodeo.get_or_intern(str)
118    }
119
120    /// Interns a string if it has not already been interned, then returns a reference
121    /// to the interned string.
122    ///
123    /// # Arguments
124    ///
125    /// * `string` - A string or any type that implements `AsRef<str>`, representing the
126    ///   string to intern.
127    ///
128    /// # Returns
129    ///
130    /// A reference to the interned version of the string.
131    ///
132    /// # Panics
133    ///
134    /// This method will panic if it encounters an invalid identifier. This should never
135    /// occur unless there is an issue with the identifier or the interner is used
136    /// incorrectly.
137    #[inline]
138    pub fn interned_str(&mut self, string: impl AsRef<str>) -> &str {
139        let str = string.as_ref();
140        if str.is_empty() {
141            return "";
142        }
143
144        let identifier = self.rodeo.get_or_intern(str);
145
146        self.rodeo.try_resolve(&identifier).expect(
147            "invalid string identifier; this should never happen unless the identifier is \
148                corrupted or the interner is used incorrectly",
149        )
150    }
151
152    /// Given an identifier, returns the identifier for the same string but with all
153    /// characters in lowercase.
154    ///
155    /// # Arguments
156    ///
157    /// * `identifier` - The identifier of the string to lower.
158    ///
159    /// # Returns
160    ///
161    /// The identifier of the string with all characters in lowercase.
162    #[inline]
163    pub fn lowered(&mut self, identifier: &StringIdentifier) -> StringIdentifier {
164        let string = self.lookup(identifier);
165
166        self.intern(string.to_ascii_lowercase())
167    }
168
169    /// Returns the interned string for the specified identifier.
170    ///
171    /// # Arguments
172    ///
173    /// * identifier - The identifier to look up.
174    ///
175    /// # Panics
176    ///
177    /// Panics if the identifier is invalid
178    #[inline]
179    pub fn lookup(&self, identifier: &StringIdentifier) -> &str {
180        if identifier.is_empty() {
181            return "";
182        }
183
184        self.rodeo.try_resolve(identifier).expect(
185            "invalid string identifier; this should never happen unless the identifier is \
186                corrupted or the interner is used incorrectly",
187        )
188    }
189}
190
191/// A thread-safe interner, allowing multiple threads to concurrently intern strings.
192#[derive(Debug, Clone)]
193pub struct ThreadedInterner {
194    rodeo: Arc<ThreadedRodeo<StringIdentifier>>,
195}
196
197impl ThreadedInterner {
198    /// Creates a new `ThreadedInterner`.
199    #[inline]
200    pub fn new() -> Self {
201        Self { rodeo: Arc::new(ThreadedRodeo::new()) }
202    }
203
204    /// Returns the number of strings stored in the interner.
205    #[inline]
206    pub fn len(&self) -> usize {
207        self.rodeo.len()
208    }
209
210    /// Returns `true` if the interner is empty.
211    #[inline]
212    pub fn is_empty(&self) -> bool {
213        self.rodeo.is_empty()
214    }
215
216    /// Interns a string and returns its identifier.
217    ///
218    /// If the string is already interned, the existing identifier is returned.
219    ///
220    /// # Arguments
221    ///
222    /// * `string` - The string to intern.
223    #[inline]
224    pub fn intern(&self, string: impl AsRef<str>) -> StringIdentifier {
225        let str = string.as_ref();
226        if str.is_empty() {
227            return StringIdentifier::empty();
228        }
229
230        self.rodeo.get_or_intern(str)
231    }
232
233    /// Interns a string if it has not already been interned, then returns a reference
234    /// to the interned string.
235    ///
236    /// # Arguments
237    ///
238    /// * `string` - A string or any type that implements `AsRef<str>`, representing the
239    ///   string to intern.
240    ///
241    /// # Returns
242    ///
243    /// A reference to the interned version of the string.
244    ///
245    /// # Panics
246    ///
247    /// This method will panic if it encounters an invalid identifier. This should never
248    /// occur unless there is an issue with the identifier or the interner is used
249    /// incorrectly.
250    #[inline]
251    pub fn interned_str(&self, string: impl AsRef<str>) -> &str {
252        let str = string.as_ref();
253        if str.is_empty() {
254            return "";
255        }
256
257        let identifier = self.rodeo.get_or_intern(str);
258
259        self.rodeo.try_resolve(&identifier).expect(
260            "invalid string identifier; this should never happen unless the identifier is \
261                corrupted or the interner is used incorrectly",
262        )
263    }
264
265    /// Given an identifier, returns the identifier for the same string but with all
266    /// characters in lowercase.
267    ///
268    /// # Arguments
269    ///
270    /// * `identifier` - The identifier of the string to lower.
271    ///
272    /// # Returns
273    ///
274    /// The identifier of the string with all characters in lowercase.
275    #[inline]
276    pub fn lowered(&self, identifier: &StringIdentifier) -> StringIdentifier {
277        let string = self.lookup(identifier);
278
279        self.intern(string.to_ascii_lowercase())
280    }
281
282    /// Looks up an interned string by its identifier.
283    ///
284    /// # Arguments
285    ///
286    /// * `identifier` - The identifier of the interned string to look up.
287    ///
288    /// # Panics
289    ///
290    /// This method will panic if it encounters an invalid identifier. This should never
291    /// occur unless there is an issue with the identifier or the interner is used
292    /// incorrectly.
293    #[inline]
294    pub fn lookup(&self, identifier: &StringIdentifier) -> &str {
295        if identifier.is_empty() {
296            return "";
297        }
298
299        self.rodeo.try_resolve(identifier).expect(
300            "invalid string identifier; this should never happen unless the identifier is \
301                corrupted or the interner is used incorrectly",
302        )
303    }
304
305    /// Returns all interned strings and their identifiers as a hashmap.
306    #[inline]
307    pub fn all(&self) -> HashSet<(StringIdentifier, &str)> {
308        self.rodeo.iter().collect()
309    }
310}
311
312impl std::fmt::Display for StringIdentifier {
313    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
314        write!(f, "string-identifier({})", self.0)
315    }
316}
317
318unsafe impl Send for ThreadedInterner {}
319unsafe impl Sync for ThreadedInterner {}
320
321impl std::default::Default for Interner {
322    #[inline]
323    fn default() -> Self {
324        Self::new()
325    }
326}
327
328impl std::default::Default for ThreadedInterner {
329    #[inline]
330    fn default() -> Self {
331        Self::new()
332    }
333}