Skip to main content

hashcons_arena/
lib.rs

1//! A hash consing arena for efficient interning of values.
2//!
3//! This arena allows you to intern values such that structurally equal values will yield the
4//! same reference, thus saving memory and improving performance in scenarios where many
5//! identical values are used.
6//!
7//! # Features
8//!
9//! - `sync`: Use a `RwLock`, rather than a `RefCell` to implement interior mutability. This
10//! allows the arena to be [`Sync`]. Depends on the `parking_lot` crate.
11//!
12//! # Example
13//!
14//! ```rust,ignore
15//! use hashcons_arena::HashConsArena;
16//!
17//! let arena = HashConsArena::new();
18//! let a = arena.intern("hello");
19//! let b = arena.intern("hello");
20//!
21//! assert!(a == b); // a and b are the same reference
22//! ```
23//!
24//! # Safety
25//!
26//! The crate uses `unsafe` in a number of places, and has been tested using miri.
27//!
28
29#[cfg(not(feature = "sync"))]
30use std::cell::RefCell as RC;
31use std::cmp::Ordering;
32use std::fmt::{self, Debug, Display};
33use std::hash::{BuildHasher, Hash, Hasher};
34use std::mem;
35use std::ops::Deref;
36use std::pin::Pin;
37use std::ptr::NonNull;
38
39use bumpalo::Bump;
40use bumpalo::boxed::Box;
41use fxhash::FxBuildHasher;
42use hashbrown::HashMap;
43#[cfg(feature = "sync")]
44use parking_lot::RwLock as RC;
45
46#[cfg(feature = "sync")]
47mod rc {
48    use super::RC;
49
50    #[inline(always)]
51    pub(crate) fn read_table<T>(table: &RC<T>) -> parking_lot::RwLockReadGuard<'_, T> {
52        table.read()
53    }
54
55    #[inline(always)]
56    pub(crate) fn write_table<T>(table: &RC<T>) -> parking_lot::RwLockWriteGuard<'_, T> {
57        table.write()
58    }
59}
60
61#[cfg(not(feature = "sync"))]
62mod rc {
63    use super::RC;
64
65    #[inline(always)]
66    pub(crate) fn read_table<T>(table: &RC<T>) -> std::cell::Ref<'_, T> {
67        table.borrow()
68    }
69
70    #[inline(always)]
71    pub(crate) fn write_table<T>(table: &RC<T>) -> std::cell::RefMut<'_, T> {
72        table.borrow_mut()
73    }
74}
75
76/// A hash consing arena that allows for efficient interning of values.
77///
78/// ## Caveat(s)
79///
80/// The arena is backed by a [bumpalo](https://crates.io/crates/bumpalo), therefore, objects
81/// allocated via the arena, will not have their [`Drop`] implementation called, without manual
82/// intervention. Thus, it is relatively easy to leak resources. If you need [`Drop`] to be called,
83/// use [`BoxedHashConsArena`] instead.
84///
85pub struct HashConsArena<T> {
86    bump: Bump,
87    table: RC<HashMap<AsVal<NonNull<T>>, (), FxBuildHasher>>,
88}
89
90pub struct HRef<'a, T> {
91    ptr: &'a T,
92}
93
94impl<'a, T> Clone for HRef<'a, T> {
95    fn clone(&self) -> Self {
96        Self { ptr: self.ptr }
97    }
98}
99
100impl<'a, T> Copy for HRef<'a, T> {}
101
102impl<'a, T> HRef<'a, T> {
103    pub(crate) fn new(ptr: &'a T) -> Self {
104        Self { ptr }
105    }
106
107    pub(crate) fn as_ptr(&self) -> *const T {
108        self.ptr as *const T
109    }
110
111    pub fn as_ref(&self) -> &'a T {
112        self.ptr
113    }
114}
115
116impl<'a, T> Deref for HRef<'a, T> {
117    type Target = T;
118
119    fn deref(&self) -> &Self::Target {
120        self.ptr
121    }
122}
123
124impl<'a, T> PartialEq for HRef<'a, T> {
125    fn eq(&self, other: &Self) -> bool {
126        std::ptr::eq(self.ptr, other.ptr)
127    }
128}
129
130impl<'a, T> PartialEq<T> for HRef<'a, T>
131where
132    T: PartialEq,
133{
134    fn eq(&self, other: &T) -> bool {
135        self.as_ref() == other
136    }
137}
138
139impl<'a, T> Eq for HRef<'a, T> {}
140
141impl<'a, T> Hash for HRef<'a, T> {
142    fn hash<H: Hasher>(&self, state: &mut H) {
143        // hash the pointer address, not the value
144        self.as_ptr().hash(state);
145    }
146}
147
148impl<'a, T> PartialOrd for HRef<'a, T> {
149    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
150        Some(self.cmp(other))
151    }
152}
153
154impl<'a, T> Ord for HRef<'a, T> {
155    fn cmp(&self, other: &Self) -> Ordering {
156        self.as_ptr().cmp(&other.as_ptr())
157    }
158}
159
160impl<'a, T> Debug for HRef<'a, T>
161where
162    T: Debug,
163{
164    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
165        f.debug_struct("HRef")
166            .field("ptr", &(self.as_ptr() as usize))
167            .field("val", &self.ptr)
168            .finish()
169    }
170}
171
172impl<'a, T> Display for HRef<'a, T>
173where
174    T: Display,
175{
176    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
177        self.ptr.fmt(f)
178    }
179}
180
181#[repr(transparent)]
182pub struct AsVal<T>(T);
183
184impl<T> PartialEq for AsVal<NonNull<T>>
185where
186    T: PartialEq,
187{
188    fn eq(&self, other: &Self) -> bool {
189        unsafe { self.0.as_ref() == other.0.as_ref() }
190    }
191}
192impl<T> Eq for AsVal<NonNull<T>> where T: Eq {}
193
194impl<T> Hash for AsVal<NonNull<T>>
195where
196    T: Hash,
197{
198    fn hash<H: Hasher>(&self, state: &mut H) {
199        unsafe { self.0.as_ref() }.hash(state);
200    }
201}
202
203impl<T> HashConsArena<T>
204where
205    T: Eq + Hash,
206{
207    /// Create a new `HashConsArena`.
208    pub fn new() -> Self {
209        Self {
210            bump: Bump::new(),
211            table: RC::new(HashMap::<_, _, FxBuildHasher>::default()),
212        }
213    }
214
215    /// Intern a value, returning a reference that's guaranteed to be the same for structurally
216    /// equal values.
217    ///
218    /// # Arguments
219    /// * `value` - The value to intern. It must implement `Eq` and `Hash`.
220    ///
221    /// # Returns
222    /// A `HRef<T>` that points to the interned value. If the value is already interned, it returns
223    /// the existing reference.
224    ///
225    /// # Example
226    ///
227    /// ```rust,ignore
228    /// use hashcons_arena::HashConsArena;
229    ///
230    /// let arena = HashConsArena::new();
231    ///
232    /// let a = arena.intern("hello");
233    /// let b = arena.intern("hello");
234    ///
235    /// assert!(a == b); // a and b are the same reference
236    /// ```
237    ///
238    pub fn intern<'a>(&'a self, value: T) -> HRef<'a, T>
239    where
240        T: Debug,
241    {
242        // check if we already have this value
243        let hash = {
244            let table = rc::read_table(&self.table);
245
246            // compute hash of the value
247            let mut hasher = table.hasher().build_hasher();
248            value.hash(&mut hasher);
249            let hash = hasher.finish();
250
251            if let Some((AsVal(existing), _)) = table
252                .raw_entry()
253                .from_hash(hash, |AsVal(ptr)| unsafe { *ptr.as_ref() == value })
254            {
255                // found
256                return HRef::new(unsafe { existing.as_ref() });
257            }
258
259            hash
260        }; // table borrow ends here
261
262        // not found, allocate new one
263        let allocated = &*self.bump.alloc(value);
264        let ptr = NonNull::from(allocated);
265
266        // add to hash table
267        rc::write_table(&self.table)
268            .raw_entry_mut()
269            .from_hash(hash, |AsVal(ptr)| unsafe { ptr.as_ref() == allocated })
270            .or_insert(AsVal(ptr), ());
271
272        HRef::new(allocated)
273    }
274
275    /// Reset the arena, clearing all interned values.
276    pub fn reset(&mut self) {
277        rc::write_table(&self.table).clear();
278        self.bump.reset();
279    }
280}
281
282/// A version of `HashConsArena` that ensures interned values are dropped.
283pub struct BoxedHashConsArena<T>
284where
285    T: 'static,
286{
287    bump: Bump,
288    table: RC<HashMap<Pin<Box<'static, T>>, (), FxBuildHasher>>,
289}
290
291impl<T> BoxedHashConsArena<T>
292where
293    T: Eq + Hash + 'static,
294{
295    /// Create a new `BoxedHashConsArena`.
296    pub fn new() -> Self {
297        Self {
298            bump: Bump::new(),
299            table: RC::new(HashMap::<_, _, FxBuildHasher>::default()),
300        }
301    }
302
303    /// Intern a value, returning a reference that's guaranteed to be the same for structurally
304    /// equal values.
305    ///
306    /// # Arguments
307    ///
308    /// * `value` - The value to intern. It must implement `Eq` and `Hash`.
309    ///
310    /// # Returns
311    ///
312    /// A `HRef<T>` that points to the interned value. If the value is already interned, it returns
313    /// the existing reference.
314    ///
315    pub fn intern<'a>(&'a self, value: T) -> HRef<'a, T> {
316        // check if we already have this value
317        let hash = {
318            let table = rc::read_table(&self.table);
319
320            // compute hash of the value
321            let mut hasher = table.hasher().build_hasher();
322            value.hash(&mut hasher);
323            let hash = hasher.finish();
324
325            if let Some((existing, _)) = table
326                .raw_entry()
327                .from_hash(hash, |ptr| *ptr.as_ref() == value)
328            {
329                // found - return reference with proper lifetime
330                // safety: the box lives as long as the arena, and we're borrowing
331                // from the arena with lifetime 'a, so this is safe
332                let existing_ref = unsafe { mem::transmute::<&T, &'a T>(existing.deref()) };
333                return HRef::new(existing_ref);
334            }
335
336            hash
337        }; // table borrow ends here
338
339        // not found, allocate new one
340        let allocated = Box::pin_in(value, &self.bump);
341
342        // safety: the box will live as long as the bump allocator, and we ensure
343        // that the bump allocator is never reset while references exist
344        let static_box =
345            unsafe { mem::transmute::<Pin<Box<'_, T>>, Pin<Box<'static, T>>>(allocated) };
346
347        // add to hash table first, then get reference from the stored location
348        let mut table = rc::write_table(&self.table);
349
350        let (stored_box, _) = table
351            .raw_entry_mut()
352            .from_hash(hash, |ptr| *ptr.as_ref() == *static_box.deref())
353            .or_insert(static_box, ());
354
355        // now get the reference from the box in its final storage location (pleasing miri)
356        let allocated_ref = unsafe { mem::transmute::<&T, &'a T>(stored_box.deref()) };
357
358        HRef::new(allocated_ref)
359    }
360
361    /// Reset the arena, clearing all interned values.
362    pub fn reset(&mut self) {
363        rc::write_table(&self.table).clear();
364        self.bump.reset();
365    }
366}
367
368impl<T> Drop for BoxedHashConsArena<T> {
369    fn drop(&mut self) {
370        // this is necessary to ensure that all boxes are dropped before the bump allocator
371        // otherwise we might have dangling references
372        rc::write_table(&self.table).clear();
373    }
374}
375
376#[cfg(test)]
377mod test {
378    use super::*;
379
380    #[test]
381    fn test_interning() {
382        let arena = HashConsArena::new();
383        let a = arena.intern("hello");
384        let b = arena.intern("hello");
385        let c = arena.intern("world");
386
387        assert!(a == b); // a and b are the same reference
388        assert!(a != c); // a and c are different references
389        assert_eq!(a, "hello");
390        assert_eq!(b, "hello");
391        assert_eq!(c, "world");
392    }
393
394    #[test]
395    fn test_multiple_arenas() {
396        let arena1 = HashConsArena::new();
397        let arena2 = HashConsArena::new();
398
399        let a1 = arena1.intern("hello");
400        let a2 = arena2.intern("hello");
401
402        assert!(a1 != a2); // a1 and a2 are different references
403        assert_eq!(a1, "hello");
404        assert_eq!(a2, "hello");
405    }
406
407    #[test]
408    fn test_drop() {
409        use std::sync::Arc;
410        use std::sync::atomic::AtomicUsize;
411
412        let arena = BoxedHashConsArena::new();
413        let drop_ctr = Arc::new(AtomicUsize::new(0));
414
415        struct MyStr {
416            value: String,
417            drop_ctr: Arc<AtomicUsize>,
418        }
419
420        impl MyStr {
421            fn new(value: impl Into<String>, drop_ctr: Arc<AtomicUsize>) -> Self {
422                Self {
423                    value: value.into(),
424                    drop_ctr,
425                }
426            }
427        }
428
429        impl PartialEq for MyStr {
430            fn eq(&self, other: &Self) -> bool {
431                self.value == other.value
432            }
433        }
434
435        impl Eq for MyStr {}
436
437        impl Hash for MyStr {
438            fn hash<H: Hasher>(&self, state: &mut H) {
439                self.value.hash(state);
440            }
441        }
442
443        impl Drop for MyStr {
444            fn drop(&mut self) {
445                self.drop_ctr
446                    .fetch_add(1, std::sync::atomic::Ordering::SeqCst);
447            }
448        }
449
450        let a = arena.intern(MyStr::new("hello", drop_ctr.clone()));
451        let b = arena.intern(MyStr::new("world", drop_ctr.clone()));
452
453        assert!(a != b); // a and b are different references
454
455        // drop the arena, which should clear the table
456        drop(arena);
457
458        // check that the drop counter was incremented for both values
459        assert_eq!(drop_ctr.load(std::sync::atomic::Ordering::SeqCst), 2);
460    }
461
462    #[test]
463    fn test_reset() {
464        let mut arena = HashConsArena::new();
465        let _a = arena.intern("hello");
466        assert_eq!(rc::read_table(&arena.table).len(), 1);
467        arena.reset();
468        assert_eq!(rc::read_table(&arena.table).len(), 0);
469    }
470}