any_intern/lib.rs
1#![doc = include_str!("../README.md")]
2
3mod any;
4mod common;
5mod dropless;
6mod typed;
7
8// === Re-exports ===
9
10pub use any::{AnyArena, AnyInternSet, AnyInterner};
11pub use common::{Interned, RawInterned, UnsafeLock};
12pub use dropless::{Dropless, DroplessInternSet, DroplessInterner};
13pub use typed::TypedArena;
14
15use std::{
16 any::TypeId,
17 borrow,
18 collections::HashMap,
19 fmt::{self, Display},
20 hash::{BuildHasher, Hash},
21};
22
23/// A generic interner for storing and deduplicating values of various types.
24///
25/// The `Interner` provides a mechanism to store values in a way that ensures each unique value is
26/// stored only once. It supports interning both static types and dropless types, allowing efficient
27/// memory usage and fast lookups.
28///
29/// Interning is useful when you need to store many instances of the same value and want to avoid
30/// duplication. Instead of storing multiple copies of the same value, the `Interner` ensures that
31/// only one instance of each unique value exists, and all references point to that instance.
32///
33/// # Examples
34///
35/// ```
36/// use any_intern::Interner;
37///
38/// #[derive(PartialEq, Eq, Hash, Debug)]
39/// struct A(u32);
40///
41/// #[derive(PartialEq, Eq, Hash, Debug)]
42/// struct B(String);
43///
44/// let interner = Interner::new();
45///
46/// // Interning integers
47/// let int1 = interner.intern_static(42_u32);
48/// let int2 = interner.intern_static(42_u32);
49/// assert_eq!(int1, int2); // Same value, same reference
50///
51/// // Interning custom structs
52/// let a1 = interner.intern_static(A(1));
53/// let a2 = interner.intern_static(A(1));
54/// assert_eq!(a1, a2); // Same value, same reference
55///
56/// // Interning strings
57/// let b1 = interner.intern_dropless(&*String::from("hello"));
58/// let b2 = interner.intern_dropless(&*String::from("hello"));
59/// assert_eq!(b1, b2); // Same value, same reference
60/// ```
61pub struct Interner<S = fxhash::FxBuildHasher> {
62 /// Intern storage for static types.
63 pub anys: UnsafeLock<HashMap<TypeId, AnyInternSet, S>>,
64
65 /// Intern storage for dropless types.
66 pub dropless: DroplessInterner,
67}
68
69impl Interner {
70 pub fn new() -> Self {
71 Self::default()
72 }
73}
74
75impl<S: BuildHasher> Interner<S> {
76 /// Stores a value in the interner, returning a reference to the interned value.
77 ///
78 /// This method inserts the given value into the interner if it does not already exist. If the
79 /// value already exists, a reference to the existing value is returned.
80 ///
81 /// # Examples
82 ///
83 /// ```
84 /// use any_intern::Interner;
85 ///
86 /// #[derive(PartialEq, Eq, Hash, Debug)]
87 /// struct A(u32);
88 ///
89 /// #[derive(PartialEq, Eq, Hash, Debug)]
90 /// struct B(String);
91 ///
92 /// let interner = Interner::new();
93 ///
94 /// // Interning integers
95 /// let int1 = interner.intern_static(42_u32);
96 /// let int2 = interner.intern_static(42_u32);
97 /// assert_eq!(int1, int2); // Same value, same reference
98 /// assert_eq!(int1.raw().as_ptr(), int2.raw().as_ptr());
99 ///
100 /// // Interning custom structs
101 /// let a1 = interner.intern_static(A(1));
102 /// let a2 = interner.intern_static(A(1));
103 /// assert_eq!(a1, a2); // Same value, same reference
104 /// assert_eq!(a1.raw().as_ptr(), a2.raw().as_ptr());
105 ///
106 /// // Interning strings
107 /// let b1 = interner.intern_static(B("hello".to_string()));
108 /// let b2 = interner.intern_static(B("hello".to_string()));
109 /// assert_eq!(b1, b2); // Same value, same reference
110 /// assert_eq!(b1.raw().as_ptr(), b2.raw().as_ptr());
111 ///
112 /// // Interning different values
113 /// let b3 = interner.intern_static(B("world".to_string()));
114 /// assert_ne!(b1, b3); // Different values, different references
115 /// ```
116 pub fn intern_static<K: Hash + Eq + 'static>(&self, value: K) -> Interned<'_, K> {
117 self.with_any_set::<K, _, _>(|set| unsafe {
118 // Safety: Type `K` is consistent and correct.
119 set.intern(value)
120 })
121 }
122
123 /// Stores a value in the interner, creating it only if it does not already exist.
124 ///
125 /// This method allows you to provide a key and a closure to generate the value. If the key
126 /// already exists in the interner, the closure is not called, and a reference to the existing
127 /// value is returned. If the key does not exist, the closure is called to create the value,
128 /// which is then stored in the interner.
129 ///
130 /// This method is useful when the value is expensive to compute, as it avoids unnecessary
131 /// computation if the value already exists.
132 ///
133 /// # Examples
134 ///
135 /// ```
136 /// use any_intern::Interner;
137 ///
138 /// #[derive(PartialEq, Eq, Hash, Debug)]
139 /// struct A(u32);
140 ///
141 /// impl std::borrow::Borrow<u32> for A {
142 /// fn borrow(&self) -> &u32 {
143 /// &self.0
144 /// }
145 /// }
146 ///
147 /// let interner = Interner::new();
148 ///
149 /// let a = interner.intern_static_with(&42, || A(42));
150 /// assert_eq!(interner.len(), 1);
151 /// assert_eq!(*a, A(42));
152 ///
153 /// let b = interner.intern_static_with(&42, || A(99)); // Closure is not called
154 /// assert_eq!(interner.len(), 1);
155 /// assert_eq!(*b, A(42));
156 ///
157 /// let c = interner.intern_static_with(&43, || A(43));
158 /// assert_eq!(interner.len(), 2);
159 /// assert_eq!(*c, A(43));
160 /// ```
161 pub fn intern_static_with<'a, K, Q, F>(&'a self, key: &Q, make_value: F) -> Interned<'a, K>
162 where
163 K: borrow::Borrow<Q> + 'static,
164 Q: Hash + Eq + ?Sized,
165 F: FnOnce() -> K,
166 {
167 self.with_any_set::<K, _, _>(|set| unsafe {
168 // Safety: Type `K` is consistent and correct.
169 set.intern_with(key, make_value)
170 })
171 }
172
173 /// Retrieves a reference to a value in the interner based on the provided key.
174 ///
175 /// This method checks if a value corresponding to the given key exists in the interner. If it
176 /// exists, a reference to the interned value is returned. Otherwise, `None` is returned.
177 ///
178 /// # Examples
179 ///
180 /// ```
181 /// use any_intern::Interner;
182 ///
183 /// let interner = Interner::new();
184 /// interner.intern_static(42_u32);
185 ///
186 /// assert_eq!(*interner.get::<u32, _>(&42_u32).unwrap(), 42);
187 /// assert!(interner.get::<u32, _>(&99_u32).is_none());
188 /// ```
189 pub fn get<K, Q>(&self, key: &Q) -> Option<Interned<'_, K>>
190 where
191 K: borrow::Borrow<Q> + 'static,
192 Q: Hash + Eq + ?Sized,
193 {
194 self.with_any_set::<K, _, _>(|set| unsafe {
195 // Safety: Type `K` is consistent and correct.
196 set.get(key)
197 })
198 }
199
200 /// Stores the given dropless value in the interner then returns reference to the value if the
201 /// interner doesn't contain the same value yet.
202 ///
203 /// If the same value exists in the interner, reference to the existing value is returned.
204 ///
205 /// This method does not take the value's ownership. Instead, it copies the value into the
206 /// interner's memory, then returns reference to that.
207 ///
208 /// # Eaxmples
209 ///
210 /// ```
211 /// use any_intern::Interner;
212 ///
213 /// let interner = Interner::new();
214 /// let a = interner.intern_dropless("hello");
215 /// let b = interner.intern_dropless(*Box::new("hello"));
216 /// let c = interner.intern_dropless("hi");
217 /// assert_eq!(a, b);
218 /// assert_ne!(a, c);
219 /// ```
220 pub fn intern_dropless<K: Dropless + ?Sized>(&self, value: &K) -> Interned<'_, K> {
221 self.dropless.intern(value)
222 }
223
224 /// Stores a value in the interner as a formatted string through [`Display`] then returns
225 /// reference to the value if the interner doesn't contain the formatted string yet.
226 ///
227 /// If the same string exists in the interner, reference to the existing string is returned.
228 ///
229 /// This method provides a buffer for making string. This will be benefit in terms of
230 /// performance when you frequently make `String` via something like `to_string()` by exploiting
231 /// chunk memory.
232 ///
233 /// If you give insufficient `upper_size`, then error is returned.
234 ///
235 /// # Examples
236 ///
237 /// ```
238 /// use any_intern::Interner;
239 ///
240 /// let interner = Interner::new();
241 /// let value = 42;
242 /// let interned = interner.intern_formatted_str(&value, 10).unwrap();
243 ///
244 /// assert_eq!(&*interned, "42");
245 /// ```
246 pub fn intern_formatted_str<K: Display + ?Sized>(
247 &self,
248 value: &K,
249 upper_size: usize,
250 ) -> Result<Interned<'_, str>, fmt::Error> {
251 self.dropless.intern_formatted_str(value, upper_size)
252 }
253
254 /// Retrieves a reference to a value in the interner based on the provided key.
255 ///
256 /// This method checks if a value corresponding to the given key exists in the interner. If it
257 /// exists, a reference to the interned value is returned. Otherwise, `None` is returned.
258 ///
259 /// # Eaxmples
260 ///
261 /// ```
262 /// use any_intern::Interner;
263 ///
264 /// let interner = Interner::new();
265 /// interner.intern_dropless("hello");
266 ///
267 /// assert_eq!(interner.get_dropless("hello").as_deref(), Some("hello"));
268 /// assert!(interner.get_dropless("hi").is_none());
269 /// ```
270 pub fn get_dropless<K: Dropless + ?Sized>(&self, value: &K) -> Option<Interned<'_, K>> {
271 self.dropless.get(value)
272 }
273
274 /// Returns number of values the interner contains.
275 pub fn len(&self) -> usize {
276 self.with_any_sets(|sets| sets.values().map(AnyInternSet::len).sum::<usize>())
277 + self.dropless.len()
278 }
279
280 /// Returns true if the interner is empty.
281 pub fn is_empty(&self) -> bool {
282 self.len() == 0
283 }
284
285 /// Removes all values in the interner.
286 ///
287 /// Although the interner support interior mutability, clear method requires mutable access
288 /// to the interner to invalidate all [`Interned`]s referencing the interner.
289 pub fn clear(&mut self) {
290 self.with_any_sets(|sets| {
291 for set in sets.values_mut() {
292 set.clear();
293 }
294 });
295 self.dropless.clear();
296 }
297
298 /// * f - Its argument is guaranteed to be a set for the type `K`.
299 fn with_any_set<'this, K, F, R>(&'this self, f: F) -> R
300 where
301 K: 'static,
302 F: FnOnce(&'this mut AnyInternSet) -> R,
303 R: 'this,
304 {
305 self.with_any_sets(|sets| {
306 let set = sets
307 .entry(TypeId::of::<K>())
308 .or_insert_with(|| AnyInternSet::of::<K>());
309 f(set)
310 })
311 }
312
313 fn with_any_sets<'this, F, R>(&self, f: F) -> R
314 where
315 F: FnOnce(&'this mut HashMap<TypeId, AnyInternSet, S>) -> R,
316 R: 'this,
317 S: 'this,
318 {
319 // Safety: Mutex unlocking is paired with the locking.
320 unsafe {
321 let sets = self.anys.lock().as_mut();
322 let ret = f(sets);
323 self.anys.unlock();
324 ret
325 }
326 }
327}
328
329impl<S: Default> Default for Interner<S> {
330 fn default() -> Self {
331 // Safety: Only one instance
332 let anys = unsafe { UnsafeLock::new(HashMap::default()) };
333 Self {
334 anys,
335 dropless: DroplessInterner::default(),
336 }
337 }
338}
339
340#[cfg(test)]
341mod tests {
342 use super::*;
343 use crate::common::{self, RawInterned};
344
345 #[test]
346 #[rustfmt::skip]
347 fn test_interner_various_types() {
348 #[derive(PartialEq, Eq, Hash)] struct A(i32);
349 #[derive(PartialEq, Eq, Hash)] struct B(i32);
350
351 let interner = Interner::new();
352
353 let groups: [&[RawInterned]; _] = [
354 &[interner.intern_static(A(0)).erased_raw(), interner.intern_static(A(0)).erased_raw()],
355 &[interner.intern_static(A(1)).erased_raw()],
356 &[interner.intern_static(B(0)).erased_raw(), interner.intern_static(B(0)).erased_raw()],
357 &[interner.intern_static(B(1)).erased_raw()],
358 ];
359 common::assert_group_addr_eq(&groups);
360 }
361
362 // Address insdie Interned<'_, T> must be valid while the interner lives.
363 #[test]
364 fn test_fixed_memory_after_huge_number_of_interninig() {
365 const TEST_SIZE_IN_BYTES: isize = 1024 * 1024 * 5; // 5 MB
366
367 let interner = Interner::new();
368
369 let mut remain_bytes = TEST_SIZE_IN_BYTES;
370 let mut interned_usize = Vec::new();
371 for i in 0_usize.. {
372 if remain_bytes < 0 {
373 break;
374 }
375 let value = i;
376 remain_bytes -= size_of_val(&value) as isize;
377
378 let interned = interner.intern_static(value);
379 interned_usize.push(interned);
380 }
381
382 let mut remain_bytes = TEST_SIZE_IN_BYTES;
383 let mut interned_str = Vec::new();
384 for i in 0.. {
385 if remain_bytes < 0 {
386 break;
387 }
388 let value = i.to_string();
389 let value = value.as_str();
390 remain_bytes -= size_of_val(&value) as isize;
391
392 let interned = interner.intern_dropless(value);
393 interned_str.push(interned);
394 }
395
396 // Test will pass if the data have not moved.
397 for (i, interned) in interned_usize.into_iter().enumerate() {
398 assert_eq!(i, *interned)
399 }
400 for (i, interned) in interned_str.into_iter().enumerate() {
401 assert_eq!(*i.to_string(), *interned);
402 }
403 }
404}