cachedhash/
cachedhash.rs

1use std::borrow::{Borrow, BorrowMut};
2use std::cmp::Ordering;
3use std::collections::hash_map::DefaultHasher;
4use std::hash::{BuildHasher, BuildHasherDefault, Hash, Hasher};
5use std::num::NonZeroU64;
6use std::ops::{Deref, DerefMut};
7
8use crate::atomic::AtomicOptionNonZeroU64;
9
10/// For a type `T`, [`CachedHash`] wraps `T` and implements [`Hash`] in a way that
11/// caches `T`'s hash value. The first time the hash is computed, it is stored
12/// and returned on subsequent calls. When the stored value is accessed mutably
13/// the hash is invalidated and needs to be recomputed again. [`CachedHash`] implements
14/// [`Deref`] and [`DerefMut`] so it can be used as a drop-in replacement for `T`.
15///
16/// In order for the hash to be invalidated correctly the stored type cannot use
17/// interior mutability in a way that affects the hash. If this is the case, you
18/// can use [`CachedHash::invalidate_hash`] to invalidate the hash manually.
19///
20/// By default the internal hash is computed using [`DefaultHasher`]. You can
21/// change this by providing a custom [`Hasher`] to [`CachedHash::new_with_hasher`] or
22/// even a custom [`BuildHasher`] to [`CachedHash::new_with_build_hasher`]. For most use
23/// cases you should not need to do this.
24///
25/// Note that the hash of a value of type `T` and the same value wrapped in
26/// [`CachedHash`] are generally different.
27///
28/// # Why is this useful?
29///
30/// Sometimes you have a type that is expensive to hash (for example because)
31/// it is very large) but you need to store and move it between multiple
32/// [`HashSet`](https://doc.rust-lang.org/std/collections/struct.HashSet.html)s
33/// In this case you can wrap the type in [`CachedHash`] to cache the hash value
34/// only once.
35///
36/// However, when the type is modified often [`CachedHash`] loses its advantage
37/// as the hash will get invalidated on every modification. [`CachedHash`] also
38/// needs to store the [`u64`] hash value which takes up some space.
39///
40/// You can run `cargo bench` to see some simple naive benchmarks comparing
41/// a plain `HashSet` with a `HashSet` that stores values wrapped in [`CachedHash`].
42///
43/// # Details
44///
45/// Whenever the hash is requested if it is not already computed it is computed
46/// using the hasher provided by the `BH` [`BuildHasher`] and stored as an "internal
47/// hash". Note that this is not the same as the hash returned by the [`Hash`] implementation.
48/// That implementation feeds the internal hash into the hasher provided to the `hash`
49/// function. This means that the resulting hash is the hash of the hash of the stored
50/// value.
51///
52/// However, there is one more issue. If we wanted to represent both the full
53/// range of hash values and the possibility of the hash not being computed yet,
54/// we would need 65 bits. In order to save space we need to reserve one value
55/// as a sentinel (this also lets us work with the stored "maybe-hash" atomically).
56/// This means that we need to artificially create a hash collision. Current
57/// implementation does this by changing the "internal hash" from 0 to 1 if it ends
58/// up being zero. This is generally not an issue. However, if you are using a custom hasher
59/// this might affect you.
60///
61/// This behaviour is not guaranteed and may change in the future. If this
62/// behaviour does not fit for your use case please open an issue.
63#[derive(Debug)]
64pub struct CachedHash<T: Eq + Hash, BH: BuildHasher = BuildHasherDefault<DefaultHasher>> {
65    value: T,
66    hash: AtomicOptionNonZeroU64,
67    build_hasher: BH,
68}
69
70impl<T: Eq + Hash + PartialOrd> PartialOrd for CachedHash<T> {
71    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
72        self.value.partial_cmp(&other.value)
73    }
74}
75
76impl<T: Eq + Hash + Ord> Ord for CachedHash<T> {
77    fn cmp(&self, other: &Self) -> Ordering {
78        self.value.cmp(&other.value)
79    }
80}
81
82impl<T: Eq + Hash> CachedHash<T> {
83    /// Creates a new [`CachedHash`] with the given value using [`DefaultHasher`].
84    ///
85    /// Note that the [`BuildHasher`] stored in the structure is a zero-sized type
86    /// that is both [`Send`] and [`Sync`] so it will not affect the [`Send`] and [`Sync`]
87    /// properties of [`CachedHash`] nor its size.
88    pub fn new(value: T) -> Self {
89        Self::new_with_hasher(value)
90    }
91}
92
93impl<T: Eq + Hash, H: Hasher + Default> CachedHash<T, BuildHasherDefault<H>> {
94    /// Creates a new [`CachedHash`] with the given value using a provided hasher type implementing [`Default`].
95    ///
96    /// Note that the [`BuildHasher`] stored in the structure is a zero-sized type
97    /// that is both [`Send`] and [`Sync`] so it will not affect the [`Send`] and [`Sync`]
98    /// properties of [`CachedHash`] nor its size.
99    pub fn new_with_hasher(value: T) -> Self {
100        Self::new_with_build_hasher(value, BuildHasherDefault::default())
101    }
102}
103
104impl<T: Eq + Hash, BH: BuildHasher> CachedHash<T, BH> {
105    /// Creates a new [`CachedHash`] with the given value and [`BuildHasher`].
106    ///
107    /// Note that `build_hasher` is stored in the structure and as such it can
108    /// cause the type to stop being [`Send`] and [`Sync`] if the hasher is not.
109    /// It can also increase the size of the structure.
110    pub const fn new_with_build_hasher(value: T, build_hasher: BH) -> Self {
111        Self {
112            value,
113            hash: AtomicOptionNonZeroU64::new_none(),
114            build_hasher,
115        }
116    }
117
118    /// Explicitly invalidates the cached hash. This should not be necessary
119    /// in most cases as the hash will be automatically invalidated when
120    /// the value is accessed mutably. However, if the value uses interior
121    /// mutability in a way that affects the hash, you will need to call
122    /// this function manually whenever the hash might have changed.
123    #[inline]
124    pub fn invalidate_hash(this: &mut Self) {
125        this.hash.set(None);
126    }
127
128    /// Destructs the [`CachedHash`] and returns the stored value.
129    #[inline]
130    #[must_use]
131    #[allow(clippy::missing_const_for_fn)] // false positive, `this` might get dropped
132    pub fn take_value(this: Self) -> T {
133        this.value
134    }
135
136    /// Explicitly returns an immutable reference to the stored value.
137    ///
138    /// Most of the time this will not be necessary as [`CachedHash`]
139    /// implements [`Deref`] so autoderef rules will automatically dereference
140    /// it to the stored value.
141    #[inline]
142    #[must_use]
143    pub const fn get(this: &Self) -> &T {
144        &this.value
145    }
146
147    /// Explicitly returns a mutable reference to the stored value and
148    /// invalidates the cached hash.
149    ///
150    /// Most of the time this will not be necessary as [`CachedHash`] implements
151    /// [`DerefMut`] so autoderef rules will automatically dereference it to the
152    /// stored value. (Such dereference still invalidates the stored hash.)
153    #[inline]
154    #[must_use]
155    pub fn get_mut(this: &mut Self) -> &mut T {
156        Self::invalidate_hash(this);
157        &mut this.value
158    }
159}
160
161impl<T: Eq + Hash, BH: BuildHasher> PartialEq for CachedHash<T, BH> {
162    fn eq(&self, other: &Self) -> bool {
163        self.value == other.value
164    }
165}
166
167impl<T: Eq + Hash, BH: BuildHasher> Eq for CachedHash<T, BH> {}
168
169impl<T: Eq + Hash, BH: BuildHasher> Hash for CachedHash<T, BH> {
170    fn hash<H2: Hasher>(&self, state: &mut H2) {
171        if let Some(hash) = self.hash.get_raw() {
172            state.write_u64(hash);
173        } else {
174            let mut hasher = self.build_hasher.build_hasher();
175            self.value.hash(&mut hasher);
176            // AtomicOptionNonZeroU64 can only store non-zero values so we create a small collision by bumping up hash 0 to 1.
177            let hash = NonZeroU64::new(hasher.finish()).unwrap_or(NonZeroU64::new(1).unwrap());
178            self.hash.set(Some(hash));
179            state.write_u64(hash.into());
180        }
181    }
182}
183
184impl<T: Eq + Hash, BH: BuildHasher> AsMut<T> for CachedHash<T, BH> {
185    fn as_mut(&mut self) -> &mut T {
186        Self::get_mut(self)
187    }
188}
189
190impl<T: Eq + Hash, BH: BuildHasher> AsRef<T> for CachedHash<T, BH> {
191    fn as_ref(&self) -> &T {
192        Self::get(self)
193    }
194}
195
196impl<T: Eq + Hash, BH: BuildHasher> BorrowMut<T> for CachedHash<T, BH> {
197    fn borrow_mut(&mut self) -> &mut T {
198        Self::get_mut(self)
199    }
200}
201
202impl<T: Eq + Hash, BH: BuildHasher> Borrow<T> for CachedHash<T, BH> {
203    fn borrow(&self) -> &T {
204        Self::get(self)
205    }
206}
207
208impl<T: Eq + Hash, BH: BuildHasher> Deref for CachedHash<T, BH> {
209    type Target = T;
210
211    #[inline]
212    fn deref(&self) -> &Self::Target {
213        Self::get(self)
214    }
215}
216
217impl<T: Eq + Hash, BH: BuildHasher> DerefMut for CachedHash<T, BH> {
218    #[inline]
219    fn deref_mut(&mut self) -> &mut Self::Target {
220        Self::get_mut(self)
221    }
222}
223
224impl<T: Eq + Hash, H: Hasher + Default> From<T> for CachedHash<T, BuildHasherDefault<H>> {
225    fn from(value: T) -> Self {
226        Self::new_with_hasher(value)
227    }
228}
229
230impl<T: Eq + Hash + Clone, BH: BuildHasher + Clone> Clone for CachedHash<T, BH> {
231    fn clone(&self) -> Self {
232        Self {
233            value: self.value.clone(),
234            hash: self.hash.clone(),
235            build_hasher: self.build_hasher.clone(),
236        }
237    }
238}
239
240#[cfg(test)]
241mod tests {
242    use std::collections::hash_map::DefaultHasher;
243    use std::hash::{Hash, Hasher};
244    use std::sync::atomic::AtomicBool;
245
246    use super::*;
247
248    fn calculate_hash<T: Hash>(t: &T) -> u64 {
249        calculate_hash_with_hasher::<T, DefaultHasher>(t)
250    }
251
252    fn calculate_hash_with_hasher<T: Hash, H: Hasher + Default>(t: &T) -> u64 {
253        let mut s = H::default();
254        t.hash(&mut s);
255        s.finish()
256    }
257
258    #[test]
259    fn hash_same_after_noop_mut_borrow() {
260        let mut foo = super::CachedHash::new("foo".to_string());
261        let hash = calculate_hash(&foo);
262        let _ = CachedHash::get_mut(&mut foo);
263        assert_eq!(hash, calculate_hash(&foo));
264    }
265
266    #[test]
267    fn hash_different_after_modification() {
268        let mut foo = super::CachedHash::new("foo".to_string());
269        let hash = calculate_hash(&foo);
270        foo.push('a');
271        // The first three lines are there to make sure the test doesn't start failing
272        // if the standard library hashing changes to create a collision in the test case.
273        // This way it would only make this test useless.
274        assert!(
275            calculate_hash(&"foo".to_string()) == 0 || // unlikely case when 0 gets internally converted to 1
276            calculate_hash(&"fooa".to_string()) == 0 || // ditto
277            calculate_hash(&"foo".to_string()) == calculate_hash(&"fooa".to_string()) || // unlikely hash collision
278            hash != calculate_hash(&foo)
279        );
280    }
281
282    #[test]
283    fn hash_same_after_invalidation() {
284        let mut foo = super::CachedHash::new("foo".to_string());
285        let hash = calculate_hash(&foo);
286        CachedHash::invalidate_hash(&mut foo);
287        assert_eq!(hash, calculate_hash(&foo));
288    }
289
290    #[test]
291    fn hash_same_after_clone() {
292        let foo = super::CachedHash::new("foo".to_string());
293        let hash = calculate_hash(&foo);
294        let foo2 = foo.clone();
295        assert_eq!(hash, calculate_hash(&foo2));
296    }
297
298    #[test]
299    fn hash_same_consecutive() {
300        let foo = super::CachedHash::new("foo".to_string());
301        let hash = calculate_hash(&foo);
302        assert_eq!(hash, calculate_hash(&foo));
303    }
304
305    #[test]
306    fn invalide_invalidates() {
307        let mut foo = super::CachedHash::new("foo".to_string());
308        assert!(foo.hash.get().is_none());
309        calculate_hash(&foo);
310        assert!(foo.hash.get().is_some());
311        CachedHash::invalidate_hash(&mut foo);
312        assert!(foo.hash.get().is_none());
313        calculate_hash(&foo);
314        assert!(foo.hash.get().is_some());
315    }
316
317    #[test]
318    fn mut_deref_invalidates() {
319        let mut foo = super::CachedHash::new("foo".to_string());
320        assert!(foo.hash.get().is_none());
321        calculate_hash(&foo);
322        assert!(foo.hash.get().is_some());
323        foo.push('a');
324        assert!(foo.hash.get().is_none());
325        calculate_hash(&foo);
326        assert!(foo.hash.get().is_some());
327        let _ = foo.len();
328        assert!(foo.hash.get().is_some());
329    }
330
331    #[test]
332    fn hash_gets_cached() {
333        struct YouOnlyHashOnce {
334            hashed_once: AtomicBool,
335        }
336        impl Eq for YouOnlyHashOnce {}
337        impl PartialEq for YouOnlyHashOnce {
338            fn eq(&self, _other: &Self) -> bool {
339                true
340            }
341        }
342        impl Hash for YouOnlyHashOnce {
343            fn hash<H: Hasher>(&self, _state: &mut H) {
344                if self
345                    .hashed_once
346                    .swap(true, std::sync::atomic::Ordering::SeqCst)
347                {
348                    panic!("Hashing should only happen once");
349                }
350            }
351        }
352
353        let foo = super::CachedHash::new(YouOnlyHashOnce {
354            hashed_once: AtomicBool::new(false),
355        });
356        calculate_hash(&foo);
357        calculate_hash(&foo);
358        calculate_hash(&foo);
359    }
360
361    #[test]
362    fn take_value() {
363        let foo = super::CachedHash::new("foo".to_string());
364        assert_eq!(CachedHash::take_value(foo), "foo".to_string());
365    }
366
367    #[test]
368    fn struct_is_small() {
369        assert!(
370            std::mem::size_of::<super::CachedHash<String>>()
371                <= std::mem::size_of::<String>() + std::mem::size_of::<u64>()
372        );
373    }
374
375    #[test]
376    fn zero_hash() {
377        use nohash_hasher::NoHashHasher;
378
379        struct FixedHash<const H: u64>();
380        impl<const H: u64> Eq for FixedHash<H> {}
381        impl<const H: u64> PartialEq for FixedHash<H> {
382            fn eq(&self, _other: &Self) -> bool {
383                true
384            }
385        }
386        impl<const H: u64> Hash for FixedHash<H> {
387            fn hash<HS: Hasher>(&self, state: &mut HS) {
388                state.write_u64(H);
389            }
390        }
391
392        assert!(
393            calculate_hash_with_hasher::<FixedHash<0>, NoHashHasher<u64>>(&FixedHash::<0>()) == 0
394        );
395        let foo: CachedHash<_, BuildHasherDefault<NoHashHasher<u64>>> =
396            super::CachedHash::new_with_hasher(FixedHash::<0>());
397        let _ = calculate_hash(&foo);
398        assert!(foo.hash.get().is_some());
399    }
400}