any_intern/
lib.rs

1#![doc = include_str!("../README.md")]
2
3mod any;
4mod common;
5mod dropless;
6mod typed;
7
8// === Re-exports ===
9
10pub use any::{AnyArena, AnyInternSet, AnyInterner};
11pub use common::{Interned, RawInterned, UnsafeLock};
12pub use dropless::{Dropless, DroplessInternSet, DroplessInterner};
13pub use typed::TypedArena;
14
15use std::{
16    any::TypeId,
17    borrow,
18    collections::HashMap,
19    hash::{BuildHasher, Hash},
20};
21
22/// A generic interner for storing and deduplicating values of various types.
23///
24/// The `Interner` provides a mechanism to store values in a way that ensures each unique value is
25/// stored only once. It supports interning both static types and dropless types, allowing efficient
26/// memory usage and fast lookups.
27///
28/// Interning is useful when you need to store many instances of the same value and want to avoid
29/// duplication. Instead of storing multiple copies of the same value, the `Interner` ensures that
30/// only one instance of each unique value exists, and all references point to that instance.
31///
32/// # Examples
33///
34/// ```
35/// use any_intern::Interner;
36///
37/// #[derive(PartialEq, Eq, Hash, Debug)]
38/// struct A(u32);
39///
40/// #[derive(PartialEq, Eq, Hash, Debug)]
41/// struct B(String);
42///
43/// let interner = Interner::new();
44///
45/// // Interning integers
46/// let int1 = interner.intern_static(42_u32);
47/// let int2 = interner.intern_static(42_u32);
48/// assert_eq!(int1, int2); // Same value, same reference
49///
50/// // Interning custom structs
51/// let a1 = interner.intern_static(A(1));
52/// let a2 = interner.intern_static(A(1));
53/// assert_eq!(a1, a2); // Same value, same reference
54///
55/// // Interning strings
56/// let b1 = interner.intern_dropless(&*String::from("hello"));
57/// let b2 = interner.intern_dropless(&*String::from("hello"));
58/// assert_eq!(b1, b2); // Same value, same reference
59/// ```
60pub struct Interner<S = fxhash::FxBuildHasher> {
61    /// Intern storage for static types.
62    pub anys: UnsafeLock<HashMap<TypeId, AnyInternSet, S>>,
63
64    /// Intern storage for dropless types.
65    pub dropless: DroplessInterner,
66}
67
68impl Interner {
69    pub fn new() -> Self {
70        Self::default()
71    }
72}
73
74impl<S: BuildHasher> Interner<S> {
75    /// Stores a value in the interner, returning a reference to the interned value.
76    ///
77    /// This method inserts the given value into the interner if it does not already exist. If the
78    /// value already exists, a reference to the existing value is returned.
79    ///
80    /// # Examples
81    ///
82    /// ```
83    /// use any_intern::Interner;
84    ///
85    /// #[derive(PartialEq, Eq, Hash, Debug)]
86    /// struct A(u32);
87    ///
88    /// #[derive(PartialEq, Eq, Hash, Debug)]
89    /// struct B(String);
90    ///
91    /// let interner = Interner::new();
92    ///
93    /// // Interning integers
94    /// let int1 = interner.intern_static(42_u32);
95    /// let int2 = interner.intern_static(42_u32);
96    /// assert_eq!(int1, int2); // Same value, same reference
97    /// assert_eq!(int1.raw().as_ptr(), int2.raw().as_ptr());
98    ///
99    /// // Interning custom structs
100    /// let a1 = interner.intern_static(A(1));
101    /// let a2 = interner.intern_static(A(1));
102    /// assert_eq!(a1, a2); // Same value, same reference
103    /// assert_eq!(a1.raw().as_ptr(), a2.raw().as_ptr());
104    ///
105    /// // Interning strings
106    /// let b1 = interner.intern_static(B("hello".to_string()));
107    /// let b2 = interner.intern_static(B("hello".to_string()));
108    /// assert_eq!(b1, b2); // Same value, same reference
109    /// assert_eq!(b1.raw().as_ptr(), b2.raw().as_ptr());
110    ///
111    /// // Interning different values
112    /// let b3 = interner.intern_static(B("world".to_string()));
113    /// assert_ne!(b1, b3); // Different values, different references
114    /// ```
115    pub fn intern_static<K: Hash + Eq + 'static>(&self, value: K) -> Interned<'_, K> {
116        self.with_any_set::<K, _, _>(|set| unsafe {
117            // Safety: Type `K` is consistent and correct.
118            set.intern(value)
119        })
120    }
121
122    /// Stores a value in the interner, creating it only if it does not already exist.
123    ///
124    /// This method allows you to provide a key and a closure to generate the value. If the key
125    /// already exists in the interner, the closure is not called, and a reference to the existing
126    /// value is returned. If the key does not exist, the closure is called to create the value,
127    /// which is then stored in the interner.
128    ///
129    /// This method is useful when the value is expensive to compute, as it avoids unnecessary
130    /// computation if the value already exists.
131    ///
132    /// # Examples
133    ///
134    /// ```
135    /// use any_intern::Interner;
136    ///
137    /// #[derive(PartialEq, Eq, Hash, Debug)]
138    /// struct A(u32);
139    ///
140    /// impl std::borrow::Borrow<u32> for A {
141    ///     fn borrow(&self) -> &u32 {
142    ///         &self.0
143    ///     }
144    /// }
145    ///
146    /// let interner = Interner::new();
147    ///
148    /// let a = interner.intern_static_with(&42, || A(42));
149    /// assert_eq!(interner.len(), 1);
150    /// assert_eq!(*a, &A(42));
151    ///
152    /// let b = interner.intern_static_with(&42, || A(99)); // Closure is not called
153    /// assert_eq!(interner.len(), 1);
154    /// assert_eq!(*b, &A(42));
155    ///
156    /// let c = interner.intern_static_with(&43, || A(43));
157    /// assert_eq!(interner.len(), 2);
158    /// assert_eq!(*c, &A(43));
159    /// ```
160    pub fn intern_static_with<'a, K, Q, F>(&'a self, key: &Q, make_value: F) -> Interned<'a, K>
161    where
162        K: borrow::Borrow<Q> + 'static,
163        Q: Hash + Eq + ?Sized,
164        F: FnOnce() -> K,
165    {
166        self.with_any_set::<K, _, _>(|set| unsafe {
167            // Safety: Type `K` is consistent and correct.
168            set.intern_with(key, make_value)
169        })
170    }
171
172    /// Retrieves a reference to a value in the interner based on the provided key.
173    ///
174    /// This method checks if a value corresponding to the given key exists in the interner. If it
175    /// exists, a reference to the interned value is returned. Otherwise, `None` is returned.
176    ///
177    /// # Examples
178    ///
179    /// ```
180    /// use any_intern::Interner;
181    ///
182    /// let interner = Interner::new();
183    /// interner.intern_static(42_u32);
184    ///
185    /// assert_eq!(*interner.get::<u32, _>(&42_u32).unwrap(), &42);
186    /// assert!(interner.get::<u32, _>(&99_u32).is_none());
187    /// ```
188    pub fn get<K, Q>(&self, key: &Q) -> Option<Interned<'_, K>>
189    where
190        K: borrow::Borrow<Q> + 'static,
191        Q: Hash + Eq + ?Sized,
192    {
193        self.with_any_set::<K, _, _>(|set| unsafe {
194            // Safety: Type `K` is consistent and correct.
195            set.get(key)
196        })
197    }
198
199    /// Stores the given dropless value in the interner then returns reference to the value if the
200    /// interner doesn't contain the same value yet.
201    ///
202    /// If the same value exists in the interner, reference to the existing value is returned.
203    ///
204    /// This method does not take the value's ownership. Instead, it copies the value into the
205    /// interner's memory, then returns reference to that.
206    ///
207    /// # Eaxmples
208    ///
209    /// ```
210    /// use any_intern::Interner;
211    ///
212    /// let interner = Interner::new();
213    /// let a = interner.intern_dropless("hello");
214    /// let b = interner.intern_dropless(*Box::new("hello"));
215    /// let c = interner.intern_dropless("hi");
216    /// assert_eq!(a, b);
217    /// assert_ne!(a, c);
218    /// ```
219    pub fn intern_dropless<K: Dropless + ?Sized>(&self, value: &K) -> Interned<'_, K> {
220        self.dropless.intern(value)
221    }
222
223    /// Retrieves a reference to a value in the interner based on the provided key.
224    ///
225    /// This method checks if a value corresponding to the given key exists in the interner. If it
226    /// exists, a reference to the interned value is returned. Otherwise, `None` is returned.
227    ///
228    /// # Eaxmples
229    ///
230    /// ```
231    /// use any_intern::Interner;
232    ///
233    /// let interner = Interner::new();
234    /// interner.intern_dropless("hello");
235    ///
236    /// assert_eq!(interner.get_dropless("hello").as_deref(), Some(&"hello"));
237    /// assert!(interner.get_dropless("hi").is_none());
238    /// ```
239    pub fn get_dropless<K: Dropless + ?Sized>(&self, value: &K) -> Option<Interned<'_, K>> {
240        self.dropless.get(value)
241    }
242
243    /// Returns number of values the interner contains.
244    pub fn len(&self) -> usize {
245        self.with_any_sets(|sets| sets.values().map(AnyInternSet::len).sum::<usize>())
246            + self.dropless.len()
247    }
248
249    /// Returns true if the interner is empty.
250    pub fn is_empty(&self) -> bool {
251        self.len() == 0
252    }
253
254    /// Removes all values in the interner.
255    ///
256    /// Although the interner support interior mutability, clear method requires mutable access
257    /// to the interner to invalidate all [`Interned`]s referencing the interner.
258    pub fn clear(&mut self) {
259        self.with_any_sets(|sets| {
260            for set in sets.values_mut() {
261                set.clear();
262            }
263        });
264        self.dropless.clear();
265    }
266
267    /// * f - Its argument is guaranteed to be a set for the type `K`.
268    fn with_any_set<'this, K, F, R>(&'this self, f: F) -> R
269    where
270        K: 'static,
271        F: FnOnce(&'this mut AnyInternSet) -> R,
272        R: 'this,
273    {
274        self.with_any_sets(|sets| {
275            let set = sets
276                .entry(TypeId::of::<K>())
277                .or_insert_with(|| AnyInternSet::of::<K>());
278            f(set)
279        })
280    }
281
282    fn with_any_sets<'this, F, R>(&self, f: F) -> R
283    where
284        F: FnOnce(&'this mut HashMap<TypeId, AnyInternSet, S>) -> R,
285        R: 'this,
286        S: 'this,
287    {
288        // Safety: Mutex unlocking is paired with the locking.
289        unsafe {
290            let sets = self.anys.lock().as_mut();
291            let ret = f(sets);
292            self.anys.unlock();
293            ret
294        }
295    }
296}
297
298impl<S: Default> Default for Interner<S> {
299    fn default() -> Self {
300        // Safety: Only one instance
301        let anys = unsafe { UnsafeLock::new(HashMap::default()) };
302        Self {
303            anys,
304            dropless: DroplessInterner::default(),
305        }
306    }
307}
308
309#[cfg(test)]
310mod tests {
311    use super::*;
312    use crate::common::{self, RawInterned};
313
314    #[test]
315    #[rustfmt::skip]
316    fn test_interner_various_types() {
317        #[derive(PartialEq, Eq, Hash)] struct A(i32);
318        #[derive(PartialEq, Eq, Hash)] struct B(i32);
319
320        let interner = Interner::new();
321
322        let groups: [&[RawInterned]; _] = [
323            &[interner.intern_static(A(0)).erased_raw(), interner.intern_static(A(0)).erased_raw()],
324            &[interner.intern_static(A(1)).erased_raw()],
325            &[interner.intern_static(B(0)).erased_raw(), interner.intern_static(B(0)).erased_raw()],
326            &[interner.intern_static(B(1)).erased_raw()],
327        ];
328        common::assert_group_addr_eq(&groups);
329    }
330
331    // Address insdie Interned<'_, T> must be valid while the interner lives.
332    #[test]
333    fn test_fixed_memory_after_huge_number_of_interninig() {
334        const TEST_SIZE_IN_BYTES: isize = 1024 * 1024 * 5; // 5 MB
335
336        let interner = Interner::new();
337
338        let mut remain_bytes = TEST_SIZE_IN_BYTES;
339        let mut interned_usize = Vec::new();
340        for i in 0_usize.. {
341            if remain_bytes < 0 {
342                break;
343            }
344            let value = i;
345            remain_bytes -= size_of_val(&value) as isize;
346
347            let interned = interner.intern_static(value);
348            interned_usize.push(interned);
349        }
350
351        let mut remain_bytes = TEST_SIZE_IN_BYTES;
352        let mut interned_str = Vec::new();
353        for i in 0.. {
354            if remain_bytes < 0 {
355                break;
356            }
357            let value = i.to_string();
358            let value = value.as_str();
359            remain_bytes -= size_of_val(&value) as isize;
360
361            let interned = interner.intern_dropless(value);
362            interned_str.push(interned);
363        }
364
365        // Test will pass if the data have not moved.
366        for (i, interned) in interned_usize.into_iter().enumerate() {
367            assert_eq!(i, **interned)
368        }
369        for (i, interned) in interned_str.into_iter().enumerate() {
370            assert_eq!(&i.to_string(), *interned);
371        }
372    }
373}