any_intern/lib.rs
1#![doc = include_str!("../README.md")]
2
3mod any;
4mod common;
5mod dropless;
6mod typed;
7
8// === Re-exports ===
9
10pub use any::{AnyArena, AnyInternSet, AnyInterner};
11pub use common::{Interned, RawInterned, UnsafeLock};
12pub use dropless::{Dropless, DroplessInternSet, DroplessInterner};
13pub use typed::TypedArena;
14
15use std::{
16 any::TypeId,
17 borrow,
18 collections::HashMap,
19 hash::{BuildHasher, Hash},
20};
21
22/// A generic interner for storing and deduplicating values of various types.
23///
24/// The `Interner` provides a mechanism to store values in a way that ensures each unique value is
25/// stored only once. It supports interning both static types and dropless types, allowing efficient
26/// memory usage and fast lookups.
27///
28/// Interning is useful when you need to store many instances of the same value and want to avoid
29/// duplication. Instead of storing multiple copies of the same value, the `Interner` ensures that
30/// only one instance of each unique value exists, and all references point to that instance.
31///
32/// # Examples
33///
34/// ```
35/// use any_intern::Interner;
36///
37/// #[derive(PartialEq, Eq, Hash, Debug)]
38/// struct A(u32);
39///
40/// #[derive(PartialEq, Eq, Hash, Debug)]
41/// struct B(String);
42///
43/// let interner = Interner::new();
44///
45/// // Interning integers
46/// let int1 = interner.intern_static(42_u32);
47/// let int2 = interner.intern_static(42_u32);
48/// assert_eq!(int1, int2); // Same value, same reference
49///
50/// // Interning custom structs
51/// let a1 = interner.intern_static(A(1));
52/// let a2 = interner.intern_static(A(1));
53/// assert_eq!(a1, a2); // Same value, same reference
54///
55/// // Interning strings
56/// let b1 = interner.intern_dropless(&*String::from("hello"));
57/// let b2 = interner.intern_dropless(&*String::from("hello"));
58/// assert_eq!(b1, b2); // Same value, same reference
59/// ```
60pub struct Interner<S = fxhash::FxBuildHasher> {
61 /// Intern storage for static types.
62 pub anys: UnsafeLock<HashMap<TypeId, AnyInternSet, S>>,
63
64 /// Intern storage for dropless types.
65 pub dropless: DroplessInterner,
66}
67
68impl Interner {
69 pub fn new() -> Self {
70 Self::default()
71 }
72}
73
74impl<S: BuildHasher> Interner<S> {
75 /// Stores a value in the interner, returning a reference to the interned value.
76 ///
77 /// This method inserts the given value into the interner if it does not already exist. If the
78 /// value already exists, a reference to the existing value is returned.
79 ///
80 /// # Examples
81 ///
82 /// ```
83 /// use any_intern::Interner;
84 ///
85 /// #[derive(PartialEq, Eq, Hash, Debug)]
86 /// struct A(u32);
87 ///
88 /// #[derive(PartialEq, Eq, Hash, Debug)]
89 /// struct B(String);
90 ///
91 /// let interner = Interner::new();
92 ///
93 /// // Interning integers
94 /// let int1 = interner.intern_static(42_u32);
95 /// let int2 = interner.intern_static(42_u32);
96 /// assert_eq!(int1, int2); // Same value, same reference
97 /// assert_eq!(int1.raw().as_ptr(), int2.raw().as_ptr());
98 ///
99 /// // Interning custom structs
100 /// let a1 = interner.intern_static(A(1));
101 /// let a2 = interner.intern_static(A(1));
102 /// assert_eq!(a1, a2); // Same value, same reference
103 /// assert_eq!(a1.raw().as_ptr(), a2.raw().as_ptr());
104 ///
105 /// // Interning strings
106 /// let b1 = interner.intern_static(B("hello".to_string()));
107 /// let b2 = interner.intern_static(B("hello".to_string()));
108 /// assert_eq!(b1, b2); // Same value, same reference
109 /// assert_eq!(b1.raw().as_ptr(), b2.raw().as_ptr());
110 ///
111 /// // Interning different values
112 /// let b3 = interner.intern_static(B("world".to_string()));
113 /// assert_ne!(b1, b3); // Different values, different references
114 /// ```
115 pub fn intern_static<K: Hash + Eq + 'static>(&self, value: K) -> Interned<'_, K> {
116 self.with_any_set::<K, _, _>(|set| unsafe {
117 // Safety: Type `K` is consistent and correct.
118 set.intern(value)
119 })
120 }
121
122 /// Stores a value in the interner, creating it only if it does not already exist.
123 ///
124 /// This method allows you to provide a key and a closure to generate the value. If the key
125 /// already exists in the interner, the closure is not called, and a reference to the existing
126 /// value is returned. If the key does not exist, the closure is called to create the value,
127 /// which is then stored in the interner.
128 ///
129 /// This method is useful when the value is expensive to compute, as it avoids unnecessary
130 /// computation if the value already exists.
131 ///
132 /// # Examples
133 ///
134 /// ```
135 /// use any_intern::Interner;
136 ///
137 /// #[derive(PartialEq, Eq, Hash, Debug)]
138 /// struct A(u32);
139 ///
140 /// impl std::borrow::Borrow<u32> for A {
141 /// fn borrow(&self) -> &u32 {
142 /// &self.0
143 /// }
144 /// }
145 ///
146 /// let interner = Interner::new();
147 ///
148 /// let a = interner.intern_static_with(&42, || A(42));
149 /// assert_eq!(interner.len(), 1);
150 /// assert_eq!(*a, &A(42));
151 ///
152 /// let b = interner.intern_static_with(&42, || A(99)); // Closure is not called
153 /// assert_eq!(interner.len(), 1);
154 /// assert_eq!(*b, &A(42));
155 ///
156 /// let c = interner.intern_static_with(&43, || A(43));
157 /// assert_eq!(interner.len(), 2);
158 /// assert_eq!(*c, &A(43));
159 /// ```
160 pub fn intern_static_with<'a, K, Q, F>(&'a self, key: &Q, make_value: F) -> Interned<'a, K>
161 where
162 K: borrow::Borrow<Q> + 'static,
163 Q: Hash + Eq + ?Sized,
164 F: FnOnce() -> K,
165 {
166 self.with_any_set::<K, _, _>(|set| unsafe {
167 // Safety: Type `K` is consistent and correct.
168 set.intern_with(key, make_value)
169 })
170 }
171
172 /// Retrieves a reference to a value in the interner based on the provided key.
173 ///
174 /// This method checks if a value corresponding to the given key exists in the interner. If it
175 /// exists, a reference to the interned value is returned. Otherwise, `None` is returned.
176 ///
177 /// # Examples
178 ///
179 /// ```
180 /// use any_intern::Interner;
181 ///
182 /// let interner = Interner::new();
183 /// interner.intern_static(42_u32);
184 ///
185 /// assert_eq!(*interner.get::<u32, _>(&42_u32).unwrap(), &42);
186 /// assert!(interner.get::<u32, _>(&99_u32).is_none());
187 /// ```
188 pub fn get<K, Q>(&self, key: &Q) -> Option<Interned<'_, K>>
189 where
190 K: borrow::Borrow<Q> + 'static,
191 Q: Hash + Eq + ?Sized,
192 {
193 self.with_any_set::<K, _, _>(|set| unsafe {
194 // Safety: Type `K` is consistent and correct.
195 set.get(key)
196 })
197 }
198
199 /// Stores the given dropless value in the interner then returns reference to the value if the
200 /// interner doesn't contain the same value yet.
201 ///
202 /// If the same value exists in the interner, reference to the existing value is returned.
203 ///
204 /// This method does not take the value's ownership. Instead, it copies the value into the
205 /// interner's memory, then returns reference to that.
206 ///
207 /// # Eaxmples
208 ///
209 /// ```
210 /// use any_intern::Interner;
211 ///
212 /// let interner = Interner::new();
213 /// let a = interner.intern_dropless("hello");
214 /// let b = interner.intern_dropless(*Box::new("hello"));
215 /// let c = interner.intern_dropless("hi");
216 /// assert_eq!(a, b);
217 /// assert_ne!(a, c);
218 /// ```
219 pub fn intern_dropless<K: Dropless + ?Sized>(&self, value: &K) -> Interned<'_, K> {
220 self.dropless.intern(value)
221 }
222
223 /// Retrieves a reference to a value in the interner based on the provided key.
224 ///
225 /// This method checks if a value corresponding to the given key exists in the interner. If it
226 /// exists, a reference to the interned value is returned. Otherwise, `None` is returned.
227 ///
228 /// # Eaxmples
229 ///
230 /// ```
231 /// use any_intern::Interner;
232 ///
233 /// let interner = Interner::new();
234 /// interner.intern_dropless("hello");
235 ///
236 /// assert_eq!(interner.get_dropless("hello").as_deref(), Some(&"hello"));
237 /// assert!(interner.get_dropless("hi").is_none());
238 /// ```
239 pub fn get_dropless<K: Dropless + ?Sized>(&self, value: &K) -> Option<Interned<'_, K>> {
240 self.dropless.get(value)
241 }
242
243 /// Returns number of values the interner contains.
244 pub fn len(&self) -> usize {
245 self.with_any_sets(|sets| sets.values().map(AnyInternSet::len).sum::<usize>())
246 + self.dropless.len()
247 }
248
249 /// Returns true if the interner is empty.
250 pub fn is_empty(&self) -> bool {
251 self.len() == 0
252 }
253
254 /// Removes all values in the interner.
255 ///
256 /// Although the interner support interior mutability, clear method requires mutable access
257 /// to the interner to invalidate all [`Interned`]s referencing the interner.
258 pub fn clear(&mut self) {
259 self.with_any_sets(|sets| {
260 for set in sets.values_mut() {
261 set.clear();
262 }
263 });
264 self.dropless.clear();
265 }
266
267 /// * f - Its argument is guaranteed to be a set for the type `K`.
268 fn with_any_set<'this, K, F, R>(&'this self, f: F) -> R
269 where
270 K: 'static,
271 F: FnOnce(&'this mut AnyInternSet) -> R,
272 R: 'this,
273 {
274 self.with_any_sets(|sets| {
275 let set = sets
276 .entry(TypeId::of::<K>())
277 .or_insert_with(|| AnyInternSet::of::<K>());
278 f(set)
279 })
280 }
281
282 fn with_any_sets<'this, F, R>(&self, f: F) -> R
283 where
284 F: FnOnce(&'this mut HashMap<TypeId, AnyInternSet, S>) -> R,
285 R: 'this,
286 S: 'this,
287 {
288 // Safety: Mutex unlocking is paired with the locking.
289 unsafe {
290 let sets = self.anys.lock().as_mut();
291 let ret = f(sets);
292 self.anys.unlock();
293 ret
294 }
295 }
296}
297
298impl<S: Default> Default for Interner<S> {
299 fn default() -> Self {
300 // Safety: Only one instance
301 let anys = unsafe { UnsafeLock::new(HashMap::default()) };
302 Self {
303 anys,
304 dropless: DroplessInterner::default(),
305 }
306 }
307}
308
309#[cfg(test)]
310mod tests {
311 use super::*;
312 use crate::common::{self, RawInterned};
313
314 #[test]
315 #[rustfmt::skip]
316 fn test_interner_various_types() {
317 #[derive(PartialEq, Eq, Hash)] struct A(i32);
318 #[derive(PartialEq, Eq, Hash)] struct B(i32);
319
320 let interner = Interner::new();
321
322 let groups: [&[RawInterned]; _] = [
323 &[interner.intern_static(A(0)).erased_raw(), interner.intern_static(A(0)).erased_raw()],
324 &[interner.intern_static(A(1)).erased_raw()],
325 &[interner.intern_static(B(0)).erased_raw(), interner.intern_static(B(0)).erased_raw()],
326 &[interner.intern_static(B(1)).erased_raw()],
327 ];
328 common::assert_group_addr_eq(&groups);
329 }
330
331 // Address insdie Interned<'_, T> must be valid while the interner lives.
332 #[test]
333 fn test_fixed_memory_after_huge_number_of_interninig() {
334 const TEST_SIZE_IN_BYTES: isize = 1024 * 1024 * 5; // 5 MB
335
336 let interner = Interner::new();
337
338 let mut remain_bytes = TEST_SIZE_IN_BYTES;
339 let mut interned_usize = Vec::new();
340 for i in 0_usize.. {
341 if remain_bytes < 0 {
342 break;
343 }
344 let value = i;
345 remain_bytes -= size_of_val(&value) as isize;
346
347 let interned = interner.intern_static(value);
348 interned_usize.push(interned);
349 }
350
351 let mut remain_bytes = TEST_SIZE_IN_BYTES;
352 let mut interned_str = Vec::new();
353 for i in 0.. {
354 if remain_bytes < 0 {
355 break;
356 }
357 let value = i.to_string();
358 let value = value.as_str();
359 remain_bytes -= size_of_val(&value) as isize;
360
361 let interned = interner.intern_dropless(value);
362 interned_str.push(interned);
363 }
364
365 // Test will pass if the data have not moved.
366 for (i, interned) in interned_usize.into_iter().enumerate() {
367 assert_eq!(i, **interned)
368 }
369 for (i, interned) in interned_str.into_iter().enumerate() {
370 assert_eq!(&i.to_string(), *interned);
371 }
372 }
373}