intern_arc/
hash.rs

1/*
2 * Copyright 2021 Actyx AG
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *     http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16use crate::{
17    loom::*,
18    ref_count::{Interned, Interner},
19};
20use std::{
21    borrow::Borrow,
22    cmp::Ordering,
23    collections::HashSet,
24    fmt::{Debug, Display, Formatter, Pointer},
25    hash::Hasher,
26    ops::Deref,
27    sync::Arc,
28};
29
30/// Interner for hashable values
31///
32/// The interner is cheaply cloneable by virtue of keeping the underlying storage
33/// in an [`Arc`](https://doc.rust-lang.org/std/sync/struct.Arc.html). Once the last
34/// reference to this interner is dropped, it will clear its backing storage and
35/// release all references to the interned values it has created that are still live.
36/// Those values remain fully operational until dropped. Memory for the values
37/// themselves is freed for each value individually once its last reference is dropped.
38/// The interner’s ArcInner memory will only be deallocated once the last interned
39/// value has been dropped (this is less than hundred bytes).
40pub struct HashInterner<T: ?Sized + Eq + std::hash::Hash> {
41    inner: Arc<Hash<T>>,
42}
43
44impl<T: ?Sized + Eq + std::hash::Hash> Clone for HashInterner<T> {
45    fn clone(&self) -> Self {
46        Self {
47            inner: self.inner.clone(),
48        }
49    }
50}
51
52#[repr(C)]
53pub struct Hash<T: ?Sized + Eq + std::hash::Hash> {
54    set: RwLock<HashSet<InternedHash<T>>>,
55}
56
57#[cfg(loom)]
58impl<T: ?Sized> Drop for Inner<T> {
59    fn drop(&mut self) {
60        // This is needed to “see” interned values added from other threads in loom
61        // (since loom doesn’t provide Weak and therefore we cannot use its Arc).
62        self.set.read();
63    }
64}
65
66unsafe impl<T: ?Sized + Eq + std::hash::Hash + Sync + Send> Send for Hash<T> {}
67unsafe impl<T: ?Sized + Eq + std::hash::Hash + Sync + Send> Sync for Hash<T> {}
68
69impl<T: ?Sized + Eq + std::hash::Hash> Interner for Hash<T> {
70    type T = T;
71
72    fn remove(&self, value: &Interned<Self>) -> (bool, Option<Interned<Self>>) {
73        let value = cast(value);
74        let mut set = self.set.write();
75        #[cfg(loom)]
76        let mut set = set.unwrap();
77        if let Some(i) = set.take(value) {
78            if i.ref_count() == 1 {
79                (true, Some(i.0))
80            } else {
81                set.insert(i);
82                (false, None)
83            }
84        } else {
85            (true, None)
86        }
87    }
88}
89
90impl<T: ?Sized + Eq + std::hash::Hash> HashInterner<T> {
91    pub fn new() -> Self {
92        Self {
93            inner: Arc::new(Hash {
94                set: RwLock::new(HashSet::new()),
95            }),
96        }
97    }
98
99    /// Returns the number of objects currently kept in this interner.
100    pub fn len(&self) -> usize {
101        let set = self.inner.set.read();
102        #[cfg(loom)]
103        let set = set.unwrap();
104        set.len()
105    }
106
107    /// Returns `true` when this interner doesn’t hold any values.
108    pub fn is_empty(&self) -> bool {
109        let set = self.inner.set.read();
110        #[cfg(loom)]
111        let set = set.unwrap();
112        set.is_empty()
113    }
114
115    fn intern<U, F>(&self, value: U, intern: F) -> InternedHash<T>
116    where
117        F: FnOnce(U) -> InternedHash<T>,
118        U: Borrow<T>,
119    {
120        #[cfg(not(loom))]
121        let set = self.inner.set.upgradable_read();
122        #[cfg(loom)]
123        let set = self.inner.set.read().unwrap();
124        if let Some(entry) = set.get(value.borrow()) {
125            return entry.clone();
126        }
127        #[cfg(not(loom))]
128        let mut set = RwLockUpgradableReadGuard::upgrade(set);
129        #[cfg(loom)]
130        let mut set = {
131            drop(set);
132            self.inner.set.write().unwrap()
133        };
134        // check again with write lock to guard against concurrent insertion
135        if let Some(entry) = set.get(value.borrow()) {
136            return entry.clone();
137        }
138        let mut ret = intern(value);
139        ret.0.make_hot(&self.inner);
140        set.insert(ret.clone());
141        ret
142    }
143
144    /// Intern a value from a shared reference by allocating new memory for it.
145    ///
146    /// ```
147    /// use intern_arc::{HashInterner, InternedHash};
148    ///
149    /// let strings = HashInterner::<str>::new();
150    /// let i: InternedHash<str> = strings.intern_ref("hello world!");
151    /// ```
152    pub fn intern_ref(&self, value: &T) -> InternedHash<T>
153    where
154        T: ToOwned,
155        T::Owned: Into<Box<T>>,
156    {
157        self.intern(value, |v| {
158            InternedHash(Interned::from_box(v.to_owned().into()))
159        })
160    }
161
162    /// Intern a value from an owned reference without allocating new memory for it.
163    ///
164    /// ```
165    /// use intern_arc::{HashInterner, InternedHash};
166    ///
167    /// let strings = HashInterner::<str>::new();
168    /// let hello: Box<str> = "hello world!".into();
169    /// let i: InternedHash<str> = strings.intern_box(hello);
170    /// ```
171    /// (This also works nicely with a `String` that can be turned `.into()` a `Box`.)
172    pub fn intern_box(&self, value: Box<T>) -> InternedHash<T> {
173        self.intern(value, |v| InternedHash(Interned::from_box(v)))
174    }
175
176    /// Intern a sized value, allocating heap memory for it.
177    ///
178    /// ```
179    /// use intern_arc::{HashInterner, InternedHash};
180    ///
181    /// let arrays = HashInterner::<[u8; 1000]>::new();
182    /// let i: InternedHash<[u8; 1000]> = arrays.intern_sized([0; 1000]);
183    pub fn intern_sized(&self, value: T) -> InternedHash<T>
184    where
185        T: Sized,
186    {
187        self.intern(value, |v| InternedHash(Interned::from_sized(v)))
188    }
189}
190
191impl<T: ?Sized + Eq + std::hash::Hash> Default for HashInterner<T> {
192    fn default() -> Self {
193        Self::new()
194    }
195}
196
197/// An interned value
198///
199/// This type works very similar to an [`Arc`](https://doc.rust-lang.org/std/sync/struct.Arc.html)
200/// with the difference that it has no concept of weak references. They are not needed because
201/// **interned values must not be modified**, so reference cycles cannot be constructed. One
202/// reference is held by the interner that created this value as long as that interner lives.
203///
204/// Keeping interned values around does not keep the interner alive: once the last reference to
205/// the interner is dropped, it will release its existing references to interned values by
206/// dropping its set implementation. Only the direct allocation size of the set implementation
207/// remains allocated (but no longer functional) until the last weak reference to the interner
208/// goes away — each interned value keeps one such weak reference to its interner.
209#[repr(transparent)] // this is important to cast references
210pub struct InternedHash<T: ?Sized + Eq + std::hash::Hash>(Interned<Hash<T>>);
211
212impl<T: ?Sized + Eq + std::hash::Hash> InternedHash<T> {
213    /// Obtain current number of references, including this one.
214    ///
215    /// The value will always be at least 1. If the value is 1, this means that the interner
216    /// which produced this reference has been dropped; in this case you are still free to
217    /// use this reference in any way you like.
218    pub fn ref_count(&self) -> u32 {
219        self.0.ref_count()
220    }
221}
222
223impl<T: ?Sized + Eq + std::hash::Hash> Clone for InternedHash<T> {
224    fn clone(&self) -> Self {
225        Self(self.0.clone())
226    }
227}
228
229fn cast<T: ?Sized + Eq + std::hash::Hash>(i: &Interned<Hash<T>>) -> &InternedHash<T> {
230    // since the memory representation of InternedHash<T> is exactly the same as
231    // Interned<Hash<T>>, we can turn a pointer to one into a pointer to the other
232    unsafe { &*(i as *const _ as *const InternedHash<T>) }
233}
234
235impl<T: ?Sized + Eq + std::hash::Hash> PartialEq for InternedHash<T>
236where
237    T: PartialEq,
238{
239    fn eq(&self, other: &Self) -> bool {
240        // equal pointer means same interner and same contents
241        self.0 == other.0 || self.deref().eq(other.deref())
242    }
243}
244impl<T: ?Sized + Eq + std::hash::Hash> Eq for InternedHash<T> where T: Eq {}
245
246impl<T: ?Sized + Eq + std::hash::Hash> PartialOrd for InternedHash<T>
247where
248    T: PartialOrd,
249{
250    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
251        if self.0 == other.0 {
252            return Some(Ordering::Equal);
253        }
254        self.deref().partial_cmp(other.deref())
255    }
256}
257impl<T: ?Sized + Eq + std::hash::Hash> Ord for InternedHash<T>
258where
259    T: Ord,
260{
261    fn cmp(&self, other: &Self) -> Ordering {
262        if self.0 == other.0 {
263            return Ordering::Equal;
264        }
265        self.deref().cmp(other.deref())
266    }
267}
268
269impl<T: ?Sized + Eq + std::hash::Hash> std::hash::Hash for InternedHash<T>
270where
271    T: std::hash::Hash,
272{
273    fn hash<H: Hasher>(&self, state: &mut H) {
274        self.deref().hash(state)
275    }
276}
277
278impl<T: ?Sized + Eq + std::hash::Hash> Borrow<T> for InternedHash<T> {
279    fn borrow(&self) -> &T {
280        self.deref()
281    }
282}
283
284impl<T: ?Sized + Eq + std::hash::Hash> Deref for InternedHash<T> {
285    type Target = T;
286
287    fn deref(&self) -> &Self::Target {
288        self.0.deref()
289    }
290}
291
292impl<T: ?Sized + Eq + std::hash::Hash> AsRef<T> for InternedHash<T> {
293    fn as_ref(&self) -> &T {
294        self.deref()
295    }
296}
297
298impl<T: ?Sized + Eq + std::hash::Hash> Debug for InternedHash<T>
299where
300    T: Debug,
301{
302    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
303        write!(f, "Interned({:?})", &**self)
304    }
305}
306
307impl<T: ?Sized + Eq + std::hash::Hash> Display for InternedHash<T>
308where
309    T: Display,
310{
311    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
312        self.deref().fmt(f)
313    }
314}
315
316impl<T: ?Sized + Eq + std::hash::Hash> Pointer for InternedHash<T> {
317    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
318        Pointer::fmt(&(&**self as *const T), f)
319    }
320}
321
322#[cfg(all(test, not(loom)))]
323mod tests {
324    use crate::OrdInterner;
325
326    #[test]
327    fn pointer() {
328        let interner = OrdInterner::new();
329        let i = interner.intern_sized(42);
330        let i2 = i.clone();
331        assert_eq!(format!("{:p}", i), format!("{:p}", i2));
332    }
333}
334
335#[test]
336fn size() {
337    let s = std::mem::size_of::<Hash<()>>();
338    assert!(s < 100, "too big: {}", s);
339}
340
341#[test]
342fn debug() {
343    let interner = HashInterner::new();
344    let i = interner.intern_ref("value");
345    assert_eq!(format!("{i:?}"), r#"Interned("value")"#);
346}
347
348#[cfg(all(test, loom))]
349mod tests {
350    use super::*;
351    use ::loom::{model, thread::spawn};
352
353    fn counts<T>(weak: Weak<T>) -> (usize, usize) {
354        // this is unfortunate: Arc does not allow querying weak_count once strong_count is zero
355        unsafe {
356            let ptr = &weak as *const _ as *const *const (usize, usize);
357            **ptr
358        }
359    }
360
361    #[test]
362    fn drop_interner() {
363        model(|| {
364            let i = HashInterner::new();
365            let i2 = Arc::downgrade(&i.inner);
366
367            let n = i.intern_box(42.into());
368
369            let h = spawn(move || drop(i));
370            let h2 = spawn(move || drop(n));
371
372            h.join().unwrap();
373            h2.join().unwrap();
374
375            assert_eq!(counts(i2), (0, 1));
376        })
377    }
378
379    #[test]
380    fn drop_two_external() {
381        model(|| {
382            let i = HashInterner::new();
383            let i2 = Arc::downgrade(&i.inner);
384
385            let n = i.intern_box(42.into());
386            let n2 = n.clone();
387            drop(i);
388
389            let h = spawn(move || drop(n));
390            let h2 = spawn(move || drop(n2));
391
392            h.join().unwrap();
393            h2.join().unwrap();
394
395            assert_eq!(counts(i2), (0, 1));
396        })
397    }
398
399    #[test]
400    fn drop_against_intern() {
401        model(|| {
402            let i = HashInterner::new();
403            let i2 = Arc::downgrade(&i.inner);
404
405            let n = i.intern_box(42.into());
406
407            let h1 = spawn(move || drop(n));
408            let h2 = spawn(move || i.intern_box(42.into()));
409
410            h1.join().unwrap();
411            h2.join().unwrap();
412
413            assert_eq!(counts(i2), (0, 1));
414        })
415    }
416
417    #[test]
418    fn drop_against_intern_and_interner() {
419        model(|| {
420            let i = HashInterner::new();
421            let i2 = Arc::downgrade(&i.inner);
422
423            let n = i.intern_box(42.into());
424            let ii = i.clone();
425
426            let h1 = spawn(move || drop(n));
427            let h2 = spawn(move || i.intern_box(42.into()));
428            let h3 = spawn(move || drop(ii));
429
430            h1.join().unwrap();
431            h2.join().unwrap();
432            h3.join().unwrap();
433
434            assert_eq!(counts(i2), (0, 1));
435        })
436    }
437}