simple_interner/
interner.rs

1use {
2    crate::Interned,
3    std::{
4        borrow::Borrow,
5        collections::hash_map::RandomState,
6        fmt,
7        hash::{BuildHasher, Hash, Hasher},
8        marker::PhantomData,
9        ops::Deref,
10        ptr::NonNull,
11    },
12};
13
14#[cfg(feature = "raw")]
15use hashbrown::hash_map::RawEntryMut;
16#[cfg(feature = "hashbrown")]
17use hashbrown::hash_map::{Entry, HashMap};
18#[cfg(not(feature = "hashbrown"))]
19use std::collections::hash_map::{Entry, HashMap};
20
21#[cfg(not(feature = "parking_lot"))]
22use std::sync::RwLock;
23#[cfg(feature = "parking_lot")]
24use {crate::parking_lot_shim::*, parking_lot::RwLock};
25
26/// A wrapper around box that does not provide &mut access to the pointee and
27/// uses raw-pointer borrowing rules to avoid invalidating extant references.
28///
29/// The resolved reference is guaranteed valid until the PinBox is dropped.
30struct PinBox<T: ?Sized> {
31    ptr: NonNull<T>,
32    _marker: PhantomData<Box<T>>,
33}
34
35impl<T: ?Sized> PinBox<T> {
36    fn new(x: Box<T>) -> Self {
37        Self {
38            ptr: NonNull::new(Box::into_raw(x)).unwrap(),
39            _marker: PhantomData,
40        }
41    }
42
43    #[allow(unsafe_code)]
44    unsafe fn as_ref<'a>(&self) -> &'a T {
45        self.ptr.as_ref()
46    }
47}
48
49impl<T: ?Sized> Drop for PinBox<T> {
50    fn drop(&mut self) {
51        #[allow(unsafe_code)] // SAFETY: PinBox acts like Box.
52        unsafe {
53            Box::from_raw(self.ptr.as_ptr())
54        };
55    }
56}
57
58impl<T: ?Sized> Deref for PinBox<T> {
59    type Target = T;
60    #[allow(unsafe_code)] // SAFETY: PinBox acts like Box.
61    fn deref(&self) -> &T {
62        unsafe { self.as_ref() }
63    }
64}
65
66impl<T: ?Sized + fmt::Debug> fmt::Debug for PinBox<T> {
67    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
68        (**self).fmt(f)
69    }
70}
71
72impl<T: ?Sized + Eq> Eq for PinBox<T> {}
73impl<T: ?Sized + PartialEq> PartialEq for PinBox<T> {
74    fn eq(&self, other: &Self) -> bool {
75        (**self).eq(&**other)
76    }
77}
78impl<T: ?Sized + PartialEq> PartialEq<T> for PinBox<T> {
79    fn eq(&self, other: &T) -> bool {
80        (**self).eq(other)
81    }
82}
83
84impl<T: ?Sized + Ord> Ord for PinBox<T> {
85    fn cmp(&self, other: &Self) -> std::cmp::Ordering {
86        (**self).cmp(&**other)
87    }
88}
89impl<T: ?Sized + PartialOrd> PartialOrd for PinBox<T> {
90    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
91        (**self).partial_cmp(&**other)
92    }
93}
94impl<T: ?Sized + PartialOrd> PartialOrd<T> for PinBox<T> {
95    fn partial_cmp(&self, other: &T) -> Option<std::cmp::Ordering> {
96        (**self).partial_cmp(other)
97    }
98}
99
100impl<T: ?Sized + Hash> Hash for PinBox<T> {
101    fn hash<H: Hasher>(&self, state: &mut H) {
102        (**self).hash(state)
103    }
104}
105
106impl<T: ?Sized> Borrow<T> for PinBox<T> {
107    fn borrow(&self) -> &T {
108        self
109    }
110}
111
112#[allow(unsafe_code)] // SAFETY: PinBox acts like Box.
113unsafe impl<T: ?Sized> Send for PinBox<T> where Box<T>: Send {}
114
115#[allow(unsafe_code)] // SAFETY: PinBox acts like Box.
116unsafe impl<T: ?Sized> Sync for PinBox<T> where Box<T>: Sync {}
117
118/// An interner based on a `HashSet`. See the crate-level docs for more.
119#[derive(Debug)]
120pub struct Interner<T: ?Sized, S = RandomState> {
121    arena: RwLock<HashMap<PinBox<T>, (), S>>,
122}
123
124impl<T: ?Sized, S: Default> Default for Interner<T, S> {
125    fn default() -> Self {
126        Interner {
127            arena: RwLock::default(),
128        }
129    }
130}
131
132impl<T: Eq + Hash + ?Sized, S: BuildHasher> Interner<T, S> {
133    /// Intern an item into the interner.
134    ///
135    /// Takes borrowed or heap-allocated items. If the item has not been
136    /// previously interned, it will be `Into::into`ed a `Box` on the heap and
137    /// cached. Notably, if you give this fn a `String` or `Vec`, the allocation
138    /// will be shrunk to fit.
139    ///
140    /// Note that the interner may need to reallocate to make space for the new
141    /// reference, just the same as a `HashSet` would. This cost is amortized to
142    /// `O(1)` as it is in other standard library collections.
143    ///
144    /// If you have an owned item (especially if it has a cheap transformation
145    /// to `Box`) and no longer need the ownership, pass it in directly.
146    /// Otherwise, pass in a reference.
147    ///
148    /// See `get` for more about the interned symbol.
149    pub fn intern<R>(&self, t: R) -> Interned<'_, T>
150    where
151        R: Borrow<T> + Into<Box<T>>,
152    {
153        let borrowed = t.borrow();
154        if let Some(interned) = self.get(borrowed) {
155            return interned;
156        }
157
158        let mut arena = self
159            .arena
160            .write()
161            .expect("interner lock should not be poisoned");
162
163        // If someone interned the item between the above check and us acquiring
164        // the write lock, this heap allocation isn't necessary. However, this
165        // is expected to be rare, so we don't bother with doing another lookup
166        // before creating the box. Using the raw_entry API could avoid this,
167        // but needs a different call than intern_raw to use the intrinsic
168        // BuildHasher rather than an external one. It's not worth the effort.
169
170        let entry = arena.entry(PinBox::new(t.into()));
171        #[allow(unsafe_code)] // SAFETY: Interned ties the lifetime to the interner.
172        match entry {
173            Entry::Occupied(entry) => Interned(unsafe { entry.key().as_ref() }),
174            Entry::Vacant(entry) => {
175                let interned = Interned(unsafe { entry.key().as_ref() });
176                entry.insert(());
177                interned
178            },
179        }
180    }
181
182    /// Get an interned reference out of this interner.
183    ///
184    /// The returned reference is bound to the lifetime of the borrow used for
185    /// this method. This guarantees that the returned reference will live no
186    /// longer than this interner does.
187    pub fn get(&self, t: &T) -> Option<Interned<'_, T>> {
188        #[allow(unsafe_code)] // SAFETY: Interned ties the lifetime to the interner.
189        self.arena
190            .read()
191            .expect("interner lock should not be poisoned")
192            .get_key_value(t)
193            .map(|(t, _)| Interned(unsafe { t.as_ref() }))
194    }
195}
196
197#[allow(unsafe_code)]
198#[cfg(feature = "raw")]
199impl<T: ?Sized, S> Interner<T, S> {
200    /// Raw interning interface for any `T`.
201    pub fn intern_raw<Q>(
202        &self,
203        it: Q,
204        hash: u64,
205        mut is_match: impl FnMut(&Q, &T) -> bool,
206        do_hash: impl Fn(&T) -> u64,
207        commit: impl FnOnce(Q) -> Box<T>,
208    ) -> Interned<'_, T> {
209        if let Some(interned) = self.get_raw(hash, |t| is_match(&it, t)) {
210            return interned;
211        }
212
213        let mut arena = self
214            .arena
215            .write()
216            .expect("interner lock should not be poisoned");
217
218        match arena.raw_entry_mut().from_hash(hash, |t| is_match(&it, t)) {
219            RawEntryMut::Occupied(entry) => Interned(unsafe { entry.key().as_ref() }),
220            RawEntryMut::Vacant(entry) => {
221                let boxed = PinBox::new(commit(it));
222                let interned = Interned(unsafe { boxed.as_ref() });
223                entry.insert_with_hasher(hash, boxed, (), |t| do_hash(t));
224                interned
225            },
226        }
227    }
228
229    /// Raw interned reference lookup.
230    pub fn get_raw(
231        &self,
232        hash: u64,
233        mut is_match: impl FnMut(&T) -> bool,
234    ) -> Option<Interned<'_, T>> {
235        self.arena
236            .read()
237            .expect("interner lock should not be poisoned")
238            .raw_entry()
239            .from_hash(hash, |t| is_match(t))
240            .map(|(t, _)| Interned(unsafe { t.as_ref() }))
241    }
242}
243
244impl<T: ?Sized> Interner<T> {
245    /// Create an empty interner.
246    ///
247    /// The backing set is initially created with a capacity of 0,
248    /// so it will not allocate until it is first inserted into.
249    pub fn new() -> Self {
250        Interner {
251            arena: RwLock::new(HashMap::default()),
252        }
253    }
254
255    /// Create an empty interner with the specified capacity.
256    ///
257    /// The interner will be able to hold at least `capacity` items without reallocating.
258    /// If `capacity` is 0, the interner will not initially allocate.
259    pub fn with_capacity(capacity: usize) -> Self {
260        Interner {
261            arena: RwLock::new(HashMap::with_capacity_and_hasher(
262                capacity,
263                RandomState::default(),
264            )),
265        }
266    }
267}
268
269/// Constructors to control the backing `HashSet`'s hash function
270impl<T: ?Sized, H: BuildHasher> Interner<T, H> {
271    #[cfg(not(feature = "hashbrown"))]
272    /// Create an empty interner which will use the given hasher to hash the values.
273    ///
274    /// The interner is also created with the default capacity.
275    pub fn with_hasher(hasher: H) -> Self {
276        Interner {
277            arena: RwLock::new(HashMap::with_hasher(hasher)),
278        }
279    }
280
281    #[cfg(feature = "hashbrown")]
282    /// Create an empty interner which will use the given hasher to hash the values.
283    ///
284    /// The interner is also created with the default capacity.
285    pub const fn with_hasher(hasher: H) -> Self {
286        Interner {
287            arena: RwLock::new(HashMap::with_hasher(hasher)),
288        }
289    }
290
291    /// Create an empty interner with the specified capacity, using `hasher` to hash the values.
292    ///
293    /// The interner will be able to hold at least `capacity` items without reallocating.
294    /// If `capacity` is 0, the interner will not initially allocate.
295    pub fn with_capacity_and_hasher(capacity: usize, hasher: H) -> Self {
296        Interner {
297            arena: RwLock::new(HashMap::with_capacity_and_hasher(capacity, hasher)),
298        }
299    }
300}