Skip to main content

any_intern/
lib.rs

1#![doc = include_str!("../README.md")]
2
3mod any;
4mod common;
5mod dropless;
6mod typed;
7
8// === Re-exports ===
9
10pub use any::{AnyArena, AnyInternSet, AnyInterner};
11pub use common::{Interned, RawInterned, UnsafeLock};
12pub use dropless::{Dropless, DroplessInternSet, DroplessInterner};
13pub use typed::TypedArena;
14
15use std::{
16    any::TypeId,
17    borrow,
18    collections::HashMap,
19    fmt::{self, Display},
20    hash::{BuildHasher, Hash},
21};
22
23/// A generic interner for storing and deduplicating values of various types.
24///
25/// The `Interner` provides a mechanism to store values in a way that ensures each unique value is
26/// stored only once. It supports interning both static types and dropless types, allowing efficient
27/// memory usage and fast lookups.
28///
29/// Interning is useful when you need to store many instances of the same value and want to avoid
30/// duplication. Instead of storing multiple copies of the same value, the `Interner` ensures that
31/// only one instance of each unique value exists, and all references point to that instance.
32///
33/// # Examples
34///
35/// ```
36/// use any_intern::Interner;
37///
38/// #[derive(PartialEq, Eq, Hash, Debug)]
39/// struct A(u32);
40///
41/// #[derive(PartialEq, Eq, Hash, Debug)]
42/// struct B(String);
43///
44/// let interner = Interner::new();
45///
46/// // Interning integers
47/// let int1 = interner.intern_static(42_u32);
48/// let int2 = interner.intern_static(42_u32);
49/// assert_eq!(int1, int2); // Same value, same reference
50///
51/// // Interning custom structs
52/// let a1 = interner.intern_static(A(1));
53/// let a2 = interner.intern_static(A(1));
54/// assert_eq!(a1, a2); // Same value, same reference
55///
56/// // Interning strings
57/// let b1 = interner.intern_dropless(&*String::from("hello"));
58/// let b2 = interner.intern_dropless(&*String::from("hello"));
59/// assert_eq!(b1, b2); // Same value, same reference
60/// ```
61pub struct Interner<S = fxhash::FxBuildHasher> {
62    /// Intern storage for static types.
63    pub anys: UnsafeLock<HashMap<TypeId, AnyInternSet, S>>,
64
65    /// Intern storage for dropless types.
66    pub dropless: DroplessInterner,
67}
68
69impl Interner {
70    pub fn new() -> Self {
71        Self::default()
72    }
73}
74
75impl<S: BuildHasher> Interner<S> {
76    /// Stores a value in the interner, returning a reference to the interned value.
77    ///
78    /// This method inserts the given value into the interner if it does not already exist. If the
79    /// value already exists, a reference to the existing value is returned.
80    ///
81    /// # Examples
82    ///
83    /// ```
84    /// use any_intern::Interner;
85    ///
86    /// #[derive(PartialEq, Eq, Hash, Debug)]
87    /// struct A(u32);
88    ///
89    /// #[derive(PartialEq, Eq, Hash, Debug)]
90    /// struct B(String);
91    ///
92    /// let interner = Interner::new();
93    ///
94    /// // Interning integers
95    /// let int1 = interner.intern_static(42_u32);
96    /// let int2 = interner.intern_static(42_u32);
97    /// assert_eq!(int1, int2); // Same value, same reference
98    /// assert_eq!(int1.raw().as_ptr(), int2.raw().as_ptr());
99    ///
100    /// // Interning custom structs
101    /// let a1 = interner.intern_static(A(1));
102    /// let a2 = interner.intern_static(A(1));
103    /// assert_eq!(a1, a2); // Same value, same reference
104    /// assert_eq!(a1.raw().as_ptr(), a2.raw().as_ptr());
105    ///
106    /// // Interning strings
107    /// let b1 = interner.intern_static(B("hello".to_string()));
108    /// let b2 = interner.intern_static(B("hello".to_string()));
109    /// assert_eq!(b1, b2); // Same value, same reference
110    /// assert_eq!(b1.raw().as_ptr(), b2.raw().as_ptr());
111    ///
112    /// // Interning different values
113    /// let b3 = interner.intern_static(B("world".to_string()));
114    /// assert_ne!(b1, b3); // Different values, different references
115    /// ```
116    pub fn intern_static<K: Hash + Eq + 'static>(&self, value: K) -> Interned<'_, K> {
117        self.with_any_set::<K, _, _>(|set| unsafe {
118            // Safety: Type `K` is consistent and correct.
119            set.intern(value)
120        })
121    }
122
123    /// Stores a value in the interner, creating it only if it does not already exist.
124    ///
125    /// This method allows you to provide a key and a closure to generate the value. If the key
126    /// already exists in the interner, the closure is not called, and a reference to the existing
127    /// value is returned. If the key does not exist, the closure is called to create the value,
128    /// which is then stored in the interner.
129    ///
130    /// This method is useful when the value is expensive to compute, as it avoids unnecessary
131    /// computation if the value already exists.
132    ///
133    /// # Examples
134    ///
135    /// ```
136    /// use any_intern::Interner;
137    ///
138    /// #[derive(PartialEq, Eq, Hash, Debug)]
139    /// struct A(u32);
140    ///
141    /// impl std::borrow::Borrow<u32> for A {
142    ///     fn borrow(&self) -> &u32 {
143    ///         &self.0
144    ///     }
145    /// }
146    ///
147    /// let interner = Interner::new();
148    ///
149    /// let a = interner.intern_static_with(&42, || A(42));
150    /// assert_eq!(interner.len(), 1);
151    /// assert_eq!(*a, A(42));
152    ///
153    /// let b = interner.intern_static_with(&42, || A(99)); // Closure is not called
154    /// assert_eq!(interner.len(), 1);
155    /// assert_eq!(*b, A(42));
156    ///
157    /// let c = interner.intern_static_with(&43, || A(43));
158    /// assert_eq!(interner.len(), 2);
159    /// assert_eq!(*c, A(43));
160    /// ```
161    pub fn intern_static_with<'a, K, Q, F>(&'a self, key: &Q, make_value: F) -> Interned<'a, K>
162    where
163        K: borrow::Borrow<Q> + 'static,
164        Q: Hash + Eq + ?Sized,
165        F: FnOnce() -> K,
166    {
167        self.with_any_set::<K, _, _>(|set| unsafe {
168            // Safety: Type `K` is consistent and correct.
169            set.intern_with(key, make_value)
170        })
171    }
172
173    /// Retrieves a reference to a value in the interner based on the provided key.
174    ///
175    /// This method checks if a value corresponding to the given key exists in the interner. If it
176    /// exists, a reference to the interned value is returned. Otherwise, `None` is returned.
177    ///
178    /// # Examples
179    ///
180    /// ```
181    /// use any_intern::Interner;
182    ///
183    /// let interner = Interner::new();
184    /// interner.intern_static(42_u32);
185    ///
186    /// assert_eq!(*interner.get::<u32, _>(&42_u32).unwrap(), 42);
187    /// assert!(interner.get::<u32, _>(&99_u32).is_none());
188    /// ```
189    pub fn get<K, Q>(&self, key: &Q) -> Option<Interned<'_, K>>
190    where
191        K: borrow::Borrow<Q> + 'static,
192        Q: Hash + Eq + ?Sized,
193    {
194        self.with_any_set::<K, _, _>(|set| unsafe {
195            // Safety: Type `K` is consistent and correct.
196            set.get(key)
197        })
198    }
199
200    /// Stores the given dropless value in the interner then returns reference to the value if the
201    /// interner doesn't contain the same value yet.
202    ///
203    /// If the same value exists in the interner, reference to the existing value is returned.
204    ///
205    /// This method does not take the value's ownership. Instead, it copies the value into the
206    /// interner's memory, then returns reference to that.
207    ///
208    /// # Eaxmples
209    ///
210    /// ```
211    /// use any_intern::Interner;
212    ///
213    /// let interner = Interner::new();
214    /// let a = interner.intern_dropless("hello");
215    /// let b = interner.intern_dropless(*Box::new("hello"));
216    /// let c = interner.intern_dropless("hi");
217    /// assert_eq!(a, b);
218    /// assert_ne!(a, c);
219    /// ```
220    pub fn intern_dropless<K: Dropless + ?Sized>(&self, value: &K) -> Interned<'_, K> {
221        self.dropless.intern(value)
222    }
223
224    /// Stores a value in the interner as a formatted string through [`Display`] then returns
225    /// reference to the value if the interner doesn't contain the formatted string yet.
226    ///
227    /// If the same string exists in the interner, reference to the existing string is returned.
228    ///
229    /// This method provides a buffer for making string. This will be benefit in terms of
230    /// performance when you frequently make `String` via something like `to_string()` by exploiting
231    /// chunk memory.
232    ///
233    /// If you give insufficient `upper_size`, then error is returned.
234    ///
235    /// # Examples
236    ///
237    /// ```
238    /// use any_intern::Interner;
239    ///
240    /// let interner = Interner::new();
241    /// let value = 42;
242    /// let interned = interner.intern_formatted_str(&value, 10).unwrap();
243    ///
244    /// assert_eq!(&*interned, "42");
245    /// ```
246    pub fn intern_formatted_str<K: Display + ?Sized>(
247        &self,
248        value: &K,
249        upper_size: usize,
250    ) -> Result<Interned<'_, str>, fmt::Error> {
251        self.dropless.intern_formatted_str(value, upper_size)
252    }
253
254    /// Retrieves a reference to a value in the interner based on the provided key.
255    ///
256    /// This method checks if a value corresponding to the given key exists in the interner. If it
257    /// exists, a reference to the interned value is returned. Otherwise, `None` is returned.
258    ///
259    /// # Eaxmples
260    ///
261    /// ```
262    /// use any_intern::Interner;
263    ///
264    /// let interner = Interner::new();
265    /// interner.intern_dropless("hello");
266    ///
267    /// assert_eq!(interner.get_dropless("hello").as_deref(), Some("hello"));
268    /// assert!(interner.get_dropless("hi").is_none());
269    /// ```
270    pub fn get_dropless<K: Dropless + ?Sized>(&self, value: &K) -> Option<Interned<'_, K>> {
271        self.dropless.get(value)
272    }
273
274    /// Returns number of values the interner contains.
275    pub fn len(&self) -> usize {
276        self.with_any_sets(|sets| sets.values().map(AnyInternSet::len).sum::<usize>())
277            + self.dropless.len()
278    }
279
280    /// Returns true if the interner is empty.
281    pub fn is_empty(&self) -> bool {
282        self.len() == 0
283    }
284
285    /// Removes all values in the interner.
286    ///
287    /// Although the interner support interior mutability, clear method requires mutable access
288    /// to the interner to invalidate all [`Interned`]s referencing the interner.
289    pub fn clear(&mut self) {
290        self.with_any_sets(|sets| {
291            for set in sets.values_mut() {
292                set.clear();
293            }
294        });
295        self.dropless.clear();
296    }
297
298    /// * f - Its argument is guaranteed to be a set for the type `K`.
299    fn with_any_set<'this, K, F, R>(&'this self, f: F) -> R
300    where
301        K: 'static,
302        F: FnOnce(&'this mut AnyInternSet) -> R,
303        R: 'this,
304    {
305        self.with_any_sets(|sets| {
306            let set = sets
307                .entry(TypeId::of::<K>())
308                .or_insert_with(|| AnyInternSet::of::<K>());
309            f(set)
310        })
311    }
312
313    fn with_any_sets<'this, F, R>(&self, f: F) -> R
314    where
315        F: FnOnce(&'this mut HashMap<TypeId, AnyInternSet, S>) -> R,
316        R: 'this,
317        S: 'this,
318    {
319        // Safety: Mutex unlocking is paired with the locking.
320        unsafe {
321            let sets = self.anys.lock().as_mut();
322            let ret = f(sets);
323            self.anys.unlock();
324            ret
325        }
326    }
327}
328
329impl<S: Default> Default for Interner<S> {
330    fn default() -> Self {
331        // Safety: Only one instance
332        let anys = unsafe { UnsafeLock::new(HashMap::default()) };
333        Self {
334            anys,
335            dropless: DroplessInterner::default(),
336        }
337    }
338}
339
340#[cfg(test)]
341mod tests {
342    use super::*;
343    use crate::common::{self, RawInterned};
344
345    #[test]
346    #[rustfmt::skip]
347    fn test_interner_various_types() {
348        #[derive(PartialEq, Eq, Hash)] struct A(i32);
349        #[derive(PartialEq, Eq, Hash)] struct B(i32);
350
351        let interner = Interner::new();
352
353        let groups: [&[RawInterned]; _] = [
354            &[interner.intern_static(A(0)).erased_raw(), interner.intern_static(A(0)).erased_raw()],
355            &[interner.intern_static(A(1)).erased_raw()],
356            &[interner.intern_static(B(0)).erased_raw(), interner.intern_static(B(0)).erased_raw()],
357            &[interner.intern_static(B(1)).erased_raw()],
358        ];
359        common::assert_group_addr_eq(&groups);
360    }
361
362    // Address insdie Interned<'_, T> must be valid while the interner lives.
363    #[test]
364    fn test_fixed_memory_after_huge_number_of_interninig() {
365        const TEST_SIZE_IN_BYTES: isize = 1024 * 1024 * 5; // 5 MB
366
367        let interner = Interner::new();
368
369        let mut remain_bytes = TEST_SIZE_IN_BYTES;
370        let mut interned_usize = Vec::new();
371        for i in 0_usize.. {
372            if remain_bytes < 0 {
373                break;
374            }
375            let value = i;
376            remain_bytes -= size_of_val(&value) as isize;
377
378            let interned = interner.intern_static(value);
379            interned_usize.push(interned);
380        }
381
382        let mut remain_bytes = TEST_SIZE_IN_BYTES;
383        let mut interned_str = Vec::new();
384        for i in 0.. {
385            if remain_bytes < 0 {
386                break;
387            }
388            let value = i.to_string();
389            let value = value.as_str();
390            remain_bytes -= size_of_val(&value) as isize;
391
392            let interned = interner.intern_dropless(value);
393            interned_str.push(interned);
394        }
395
396        // Test will pass if the data have not moved.
397        for (i, interned) in interned_usize.into_iter().enumerate() {
398            assert_eq!(i, *interned)
399        }
400        for (i, interned) in interned_str.into_iter().enumerate() {
401            assert_eq!(*i.to_string(), *interned);
402        }
403    }
404}