intern_arc/
hash.rs

1/*
2 * Copyright 2021 Actyx AG
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *     http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16use crate::{
17    loom::*,
18    ref_count::{Interned, Interner},
19};
20use std::{
21    borrow::Borrow,
22    cmp::Ordering,
23    collections::HashSet,
24    fmt::{Debug, Display, Formatter, Pointer},
25    hash::Hasher,
26    ops::Deref,
27    sync::Arc,
28};
29
30/// Interner for hashable values
31///
32/// The interner is cheaply cloneable by virtue of keeping the underlying storage
33/// in an [`Arc`](https://doc.rust-lang.org/std/sync/struct.Arc.html). Once the last
34/// reference to this interner is dropped, it will clear its backing storage and
35/// release all references to the interned values it has created that are still live.
36/// Those values remain fully operational until dropped. Memory for the values
37/// themselves is freed for each value individually once its last reference is dropped.
38/// The interner’s ArcInner memory will only be deallocated once the last interned
39/// value has been dropped (this is less than hundred bytes).
40pub struct HashInterner<T: ?Sized + Eq + std::hash::Hash> {
41    inner: Arc<Hash<T>>,
42}
43
44impl<T: ?Sized + Eq + std::hash::Hash> Clone for HashInterner<T> {
45    fn clone(&self) -> Self {
46        Self {
47            inner: self.inner.clone(),
48        }
49    }
50}
51
52#[repr(C)]
53pub struct Hash<T: ?Sized + Eq + std::hash::Hash> {
54    set: RwLock<HashSet<InternedHash<T>>>,
55}
56
57#[cfg(loom)]
58impl<T: ?Sized> Drop for Inner<T> {
59    fn drop(&mut self) {
60        // This is needed to “see” interned values added from other threads in loom
61        // (since loom doesn’t provide Weak and therefore we cannot use its Arc).
62        self.set.read();
63    }
64}
65
66unsafe impl<T: ?Sized + Eq + std::hash::Hash + Sync + Send> Send for Hash<T> {}
67unsafe impl<T: ?Sized + Eq + std::hash::Hash + Sync + Send> Sync for Hash<T> {}
68
69impl<T: ?Sized + Eq + std::hash::Hash> Interner for Hash<T> {
70    type T = T;
71
72    fn remove(&self, value: &Interned<Self>) -> (bool, Option<Interned<Self>>) {
73        let value = cast(value);
74        let mut set = self.set.write();
75        #[cfg(loom)]
76        let mut set = set.unwrap();
77        if let Some(i) = set.take(value) {
78            if i.ref_count() == 1 {
79                (true, Some(i.0))
80            } else {
81                set.insert(i);
82                (false, None)
83            }
84        } else {
85            (true, None)
86        }
87    }
88}
89
90impl<T: ?Sized + Eq + std::hash::Hash> HashInterner<T> {
91    pub fn new() -> Self {
92        Self {
93            inner: Arc::new(Hash {
94                set: RwLock::new(HashSet::new()),
95            }),
96        }
97    }
98
99    /// Returns the number of objects currently kept in this interner.
100    pub fn len(&self) -> usize {
101        let set = self.inner.set.read();
102        #[cfg(loom)]
103        let set = set.unwrap();
104        set.len()
105    }
106
107    /// Returns `true` when this interner doesn’t hold any values.
108    pub fn is_empty(&self) -> bool {
109        let set = self.inner.set.read();
110        #[cfg(loom)]
111        let set = set.unwrap();
112        set.is_empty()
113    }
114
115    fn intern<U, F>(&self, value: U, intern: F) -> InternedHash<T>
116    where
117        F: FnOnce(U) -> InternedHash<T>,
118        U: Borrow<T>,
119    {
120        #[cfg(not(loom))]
121        let set = self.inner.set.upgradable_read();
122        #[cfg(loom)]
123        let set = self.inner.set.read().unwrap();
124        if let Some(entry) = set.get(value.borrow()) {
125            return entry.clone();
126        }
127        #[cfg(not(loom))]
128        let mut set = RwLockUpgradableReadGuard::upgrade(set);
129        #[cfg(loom)]
130        let mut set = {
131            drop(set);
132            self.inner.set.write().unwrap()
133        };
134        // check again with write lock to guard against concurrent insertion
135        if let Some(entry) = set.get(value.borrow()) {
136            return entry.clone();
137        }
138        let mut ret = intern(value);
139        ret.0.make_hot(&self.inner);
140        set.insert(ret.clone());
141        ret
142    }
143
144    /// Intern a value from a shared reference by allocating new memory for it.
145    ///
146    /// ```
147    /// use intern_arc::{HashInterner, InternedHash};
148    ///
149    /// let strings = HashInterner::<str>::new();
150    /// let i: InternedHash<str> = strings.intern_ref("hello world!");
151    /// ```
152    pub fn intern_ref(&self, value: &T) -> InternedHash<T>
153    where
154        T: ToOwned,
155        T::Owned: Into<Box<T>>,
156    {
157        self.intern(value, |v| {
158            InternedHash(Interned::from_box(v.to_owned().into()))
159        })
160    }
161
162    /// Intern a value from an owned reference without allocating new memory for it.
163    ///
164    /// ```
165    /// use intern_arc::{HashInterner, InternedHash};
166    ///
167    /// let strings = HashInterner::<str>::new();
168    /// let hello: Box<str> = "hello world!".into();
169    /// let i: InternedHash<str> = strings.intern_box(hello);
170    /// ```
171    /// (This also works nicely with a `String` that can be turned `.into()` a `Box`.)
172    pub fn intern_box(&self, value: Box<T>) -> InternedHash<T> {
173        self.intern(value, |v| InternedHash(Interned::from_box(v)))
174    }
175
176    /// Intern a sized value, allocating heap memory for it.
177    ///
178    /// ```
179    /// use intern_arc::{HashInterner, InternedHash};
180    ///
181    /// let arrays = HashInterner::<[u8; 1000]>::new();
182    /// let i: InternedHash<[u8; 1000]> = arrays.intern_sized([0; 1000]);
183    pub fn intern_sized(&self, value: T) -> InternedHash<T>
184    where
185        T: Sized,
186    {
187        self.intern(value, |v| InternedHash(Interned::from_sized(v)))
188    }
189}
190
191impl<T: ?Sized + Eq + std::hash::Hash> Default for HashInterner<T> {
192    fn default() -> Self {
193        Self::new()
194    }
195}
196
197/// An interned value
198///
199/// This type works very similar to an [`Arc`](https://doc.rust-lang.org/std/sync/struct.Arc.html)
200/// with the difference that it has no concept of weak references. They are not needed because
201/// **interned values must not be modified**, so reference cycles cannot be constructed. One
202/// reference is held by the interner that created this value as long as that interner lives.
203///
204/// Keeping interned values around does not keep the interner alive: once the last reference to
205/// the interner is dropped, it will release its existing references to interned values by
206/// dropping its set implementation. Only the direct allocation size of the set implementation
207/// remains allocated (but no longer functional) until the last weak reference to the interner
208/// goes away — each interned value keeps one such weak reference to its interner.
209#[repr(transparent)] // this is important to cast references
210pub struct InternedHash<T: ?Sized + Eq + std::hash::Hash>(Interned<Hash<T>>);
211
212impl<T: ?Sized + Eq + std::hash::Hash> InternedHash<T> {
213    /// Obtain current number of references, including this one.
214    ///
215    /// The value will always be at least 1. If the value is 1, this means that the interner
216    /// which produced this reference has been dropped; in this case you are still free to
217    /// use this reference in any way you like.
218    pub fn ref_count(&self) -> u32 {
219        self.0.ref_count()
220    }
221}
222
223impl<T: ?Sized + Eq + std::hash::Hash> Clone for InternedHash<T> {
224    fn clone(&self) -> Self {
225        Self(self.0.clone())
226    }
227}
228
229fn cast<T: ?Sized + Eq + std::hash::Hash>(i: &Interned<Hash<T>>) -> &InternedHash<T> {
230    // since the memory representation of InternedHash<T> is exactly the same as
231    // Interned<Hash<T>>, we can turn a pointer to one into a pointer to the other
232    unsafe { &*(i as *const _ as *const InternedHash<T>) }
233}
234
235impl<T: ?Sized + Eq + std::hash::Hash> PartialEq for InternedHash<T>
236where
237    T: PartialEq,
238{
239    fn eq(&self, other: &Self) -> bool {
240        // equal pointer means same interner and same contents
241        self.0 == other.0 || self.deref().eq(other.deref())
242    }
243}
244impl<T: ?Sized + Eq + std::hash::Hash> Eq for InternedHash<T> where T: Eq {}
245
246impl<T: ?Sized + Eq + std::hash::Hash> PartialOrd for InternedHash<T>
247where
248    T: PartialOrd,
249{
250    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
251        if self.0 == other.0 {
252            return Some(Ordering::Equal);
253        }
254        self.deref().partial_cmp(other.deref())
255    }
256}
257impl<T: ?Sized + Eq + std::hash::Hash> Ord for InternedHash<T>
258where
259    T: Ord,
260{
261    fn cmp(&self, other: &Self) -> Ordering {
262        if self.0 == other.0 {
263            return Ordering::Equal;
264        }
265        self.deref().cmp(other.deref())
266    }
267}
268
269impl<T: ?Sized + Eq + std::hash::Hash> std::hash::Hash for InternedHash<T>
270where
271    T: std::hash::Hash,
272{
273    fn hash<H: Hasher>(&self, state: &mut H) {
274        self.deref().hash(state)
275    }
276}
277
278impl<T: ?Sized + Eq + std::hash::Hash> Borrow<T> for InternedHash<T> {
279    fn borrow(&self) -> &T {
280        self.deref()
281    }
282}
283
284impl<T: ?Sized + Eq + std::hash::Hash> Deref for InternedHash<T> {
285    type Target = T;
286
287    fn deref(&self) -> &Self::Target {
288        self.0.deref()
289    }
290}
291
292impl<T: ?Sized + Eq + std::hash::Hash> AsRef<T> for InternedHash<T> {
293    fn as_ref(&self) -> &T {
294        self.deref()
295    }
296}
297
298impl<T: ?Sized + Eq + std::hash::Hash> Debug for InternedHash<T>
299where
300    T: Debug,
301{
302    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
303        write!(f, "Interned({:?})", &**self)
304    }
305}
306
307impl<T: ?Sized + Eq + std::hash::Hash> Display for InternedHash<T>
308where
309    T: Display,
310{
311    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
312        self.deref().fmt(f)
313    }
314}
315
316impl<T: ?Sized + Eq + std::hash::Hash> Pointer for InternedHash<T> {
317    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
318        Pointer::fmt(&(&**self as *const T), f)
319    }
320}
321
322#[cfg(feature = "serde")]
323impl<T: ?Sized + Eq + std::hash::Hash + serde::Serialize> serde::Serialize for InternedHash<T> {
324    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
325    where
326        S: serde::Serializer,
327    {
328        (**self).serialize(serializer)
329    }
330}
331
332#[cfg(feature = "serde")]
333impl<'de, T> serde::Deserialize<'de> for InternedHash<T>
334where
335    T: Eq + std::hash::Hash + serde::Deserialize<'de> + Send + Sync + 'static,
336{
337    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
338    where
339        D: serde::Deserializer<'de>,
340    {
341        let value = T::deserialize(deserializer)?;
342        Ok(crate::global::hash_interner().intern_sized(value))
343    }
344}
345
346#[cfg(feature = "serde")]
347impl<'de, T> serde::Deserialize<'de> for InternedHash<[T]>
348where
349    T: Eq + std::hash::Hash + serde::Deserialize<'de> + Send + Sync + 'static,
350{
351    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
352    where
353        D: serde::Deserializer<'de>,
354    {
355        let value = Vec::<T>::deserialize(deserializer)?;
356        Ok(crate::global::hash_interner().intern_box(value.into()))
357    }
358}
359
360#[cfg(feature = "serde")]
361impl<'de> serde::Deserialize<'de> for InternedHash<str> {
362    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
363    where
364        D: serde::Deserializer<'de>,
365    {
366        let value = String::deserialize(deserializer)?;
367        Ok(crate::global::hash_interner().intern_box(value.into()))
368    }
369}
370
371#[cfg(all(test, not(loom)))]
372mod tests {
373    use crate::OrdInterner;
374
375    #[test]
376    fn pointer() {
377        let interner = OrdInterner::new();
378        let i = interner.intern_sized(42);
379        let i2 = i.clone();
380        assert_eq!(format!("{:p}", i), format!("{:p}", i2));
381    }
382}
383
384#[test]
385fn size() {
386    let s = std::mem::size_of::<Hash<()>>();
387    assert!(s < 100, "too big: {}", s);
388}
389
390#[test]
391fn debug() {
392    let interner = HashInterner::new();
393    let i = interner.intern_ref("value");
394    assert_eq!(format!("{i:?}"), r#"Interned("value")"#);
395}
396
397#[cfg(all(test, loom))]
398mod tests {
399    use super::*;
400    use ::loom::{model, thread::spawn};
401
402    fn counts<T>(weak: Weak<T>) -> (usize, usize) {
403        // this is unfortunate: Arc does not allow querying weak_count once strong_count is zero
404        unsafe {
405            let ptr = &weak as *const _ as *const *const (usize, usize);
406            **ptr
407        }
408    }
409
410    #[test]
411    fn drop_interner() {
412        model(|| {
413            let i = HashInterner::new();
414            let i2 = Arc::downgrade(&i.inner);
415
416            let n = i.intern_box(42.into());
417
418            let h = spawn(move || drop(i));
419            let h2 = spawn(move || drop(n));
420
421            h.join().unwrap();
422            h2.join().unwrap();
423
424            assert_eq!(counts(i2), (0, 1));
425        })
426    }
427
428    #[test]
429    fn drop_two_external() {
430        model(|| {
431            let i = HashInterner::new();
432            let i2 = Arc::downgrade(&i.inner);
433
434            let n = i.intern_box(42.into());
435            let n2 = n.clone();
436            drop(i);
437
438            let h = spawn(move || drop(n));
439            let h2 = spawn(move || drop(n2));
440
441            h.join().unwrap();
442            h2.join().unwrap();
443
444            assert_eq!(counts(i2), (0, 1));
445        })
446    }
447
448    #[test]
449    fn drop_against_intern() {
450        model(|| {
451            let i = HashInterner::new();
452            let i2 = Arc::downgrade(&i.inner);
453
454            let n = i.intern_box(42.into());
455
456            let h1 = spawn(move || drop(n));
457            let h2 = spawn(move || i.intern_box(42.into()));
458
459            h1.join().unwrap();
460            h2.join().unwrap();
461
462            assert_eq!(counts(i2), (0, 1));
463        })
464    }
465
466    #[test]
467    fn drop_against_intern_and_interner() {
468        model(|| {
469            let i = HashInterner::new();
470            let i2 = Arc::downgrade(&i.inner);
471
472            let n = i.intern_box(42.into());
473            let ii = i.clone();
474
475            let h1 = spawn(move || drop(n));
476            let h2 = spawn(move || i.intern_box(42.into()));
477            let h3 = spawn(move || drop(ii));
478
479            h1.join().unwrap();
480            h2.join().unwrap();
481            h3.join().unwrap();
482
483            assert_eq!(counts(i2), (0, 1));
484        })
485    }
486}