hash_that_set/
lib.rs

1/*
2 * Copyright © 2023 Anand Beh
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *     http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#![forbid(unsafe_code)]
18
19//!
20//! This crate is dedicated to the sole purpose of hashing collections of elements
21//! in an iteration order-independent fashion. For example, it can be used to hash
22//! a `HashMap` or `HashSet`.
23//!
24//! Documentation referring to a "collection" means any type `C` where `&C: IntoIterator`
25//!
26
27use std::collections::hash_map::DefaultHasher;
28use std::collections::{HashMap, HashSet};
29use std::hash::{BuildHasher, Hash, Hasher};
30use std::marker::PhantomData;
31use std::num::Wrapping;
32use std::ops::{Deref, DerefMut};
33
34///
35/// Implements hashing by summing the hashes of each element. A new [`DefaultHasher`]
36/// is created for each element, its result added to the total calculation.
37///
38pub fn hash_by_summing_hashes<C, H>(collection: &C, state: &mut H)
39where
40    for<'c> &'c C: IntoIterator,
41    for<'c> <&'c C as IntoIterator>::Item: Hash,
42    H: Hasher,
43{
44    hash_by_summing_hashes_with::<C, H, UseDefaultHasher>(collection, state)
45}
46
47///
48/// The main function implementing hashing by summing the hashes of each element,
49/// with a means of specifying which kind of hasher is created per element via the `BH`
50/// parameter.
51///
52pub fn hash_by_summing_hashes_with<C, H, BH>(collection: &C, state: &mut H)
53where
54    for<'c> &'c C: IntoIterator,
55    for<'c> <&'c C as IntoIterator>::Item: Hash,
56    BH: BuildHasherFromFriend<C>,
57    H: Hasher,
58{
59    let mut sum = Wrapping::default();
60    for value in collection {
61        let mut hasher = BH::build_hasher_from(collection);
62        Hash::hash(&value, &mut hasher);
63        sum += hasher.finish();
64    }
65    state.write_u64(sum.0);
66}
67
68///
69/// Adds hashing to any collection according to the hash of each element, but without
70/// respecting iteration order. Instantly usable with `HashMap` or `HashSet`. `Deref` and
71/// `DerefMut` provide access to the wrapped type. To create, use the `new` method or `From`.
72///
73/// ```rust
74/// # use std::collections::HashMap;
75/// use hash_that_set::SumHashes;
76///
77/// let my_map: HashMap<i8, String> = HashMap::new();
78/// let mut my_map = SumHashes::new(my_map);
79///
80/// my_map.insert(2, String::from("hello"));
81/// ```
82///
83/// This may be used with any collection, although it requires the wrapped collection to implement
84/// [`ProvidesHasher`].
85///
86/// The layout of this struct is guaranteed to be the same as the wrapped collection. This means
87/// it is possible to transmute references; however, [`hash_by_summing_hashes`] is usually a better
88/// option than relying on `unsafe`.
89///
90#[derive(Clone, Debug, Default, Eq, PartialEq)]
91#[repr(transparent)]
92pub struct SumHashes<C: ProvidesHasher>(SumHashesAnyCollection<C, UseProvidedHasher<C>>);
93
94///
95/// Adds hashing to any collection according to the hash of each element, but without
96/// respecting iteration order. Always usable with any collection, via the default hasher.
97/// `Deref` and `DerefMut` provide access to the wrapped type.
98///
99/// **Do not use this wrapper with an ordered collection**. The wrapper does not change equality
100/// semantics; it affects hashing only.
101///
102/// The layout of this struct is guaranteed to be the same as the wrapped collection. This means
103/// it is possible to transmute references; however, [`hash_by_summing_hashes_with`] is usually a
104/// better option than relying on `unsafe`.
105///
106#[derive(Clone, Debug, Default, Eq, PartialEq)]
107#[repr(transparent)]
108pub struct SumHashesAnyCollection<C, H = UseDefaultHasher>(C, PhantomData<H>);
109
110impl<C: ProvidesHasher> From<C> for SumHashes<C> {
111    /// Creates the wrapper
112    #[inline]
113    fn from(value: C) -> Self {
114        Self(SumHashesAnyCollection::from(value))
115    }
116}
117
118impl<C: ProvidesHasher> SumHashes<C> {
119    /// Creates the wrapper
120    #[inline]
121    pub fn new(value: C) -> Self {
122        Self::from(value)
123    }
124
125    /// Destructures into the inner collection
126    #[inline]
127    pub fn into_inner(self) -> C {
128        self.0 .0
129    }
130}
131
132impl<C, H> From<C> for SumHashesAnyCollection<C, H> {
133    /// Creates the wrapper
134    #[inline]
135    fn from(value: C) -> Self {
136        Self(value, PhantomData)
137    }
138}
139
140impl<C, H> SumHashesAnyCollection<C, H> {
141    /// Creates the wrapper
142    #[inline]
143    pub fn new(value: C) -> Self {
144        Self::from(value)
145    }
146
147    /// Destructures into the inner collection
148    #[inline]
149    pub fn into_inner(self) -> C {
150        self.0
151    }
152}
153
154/// Like the standard library's [`BuildHasher`], but takes the hashing implementation from a peer
155pub trait BuildHasherFromFriend<F> {
156    /// The type of the hasher that will be created
157    type Hasher: Hasher;
158
159    /// Creates a hashing implementation
160    fn build_hasher_from<'f>(friend: &'f F) -> Self::Hasher
161    where
162        Self::Hasher: 'f;
163}
164
165/// Implementation of [`BuildHasherFromFriend`] which uses the default hasher, always
166#[derive(Clone, Copy, Debug, Default, Eq, PartialEq)]
167pub struct UseDefaultHasher(());
168
169impl<F> BuildHasherFromFriend<F> for UseDefaultHasher {
170    type Hasher = DefaultHasher;
171
172    fn build_hasher_from<'f>(_: &'f F) -> Self::Hasher
173    where
174        Self::Hasher: 'f,
175    {
176        DefaultHasher::new()
177    }
178}
179
180/// Implementation of [`BuildHasherFromFriend`] which requires that the peer provides the hasher,
181/// i.e. that [`ProvidesHasher`] is implemented for the peer object
182#[derive(Clone, Copy, Debug, Default, Eq, PartialEq)]
183pub struct UseProvidedHasher<C: ProvidesHasher>(PhantomData<C>);
184
185impl<C: ProvidesHasher> BuildHasherFromFriend<C> for UseProvidedHasher<C> {
186    type Hasher = <<C as ProvidesHasher>::Hasher as BuildHasher>::Hasher;
187
188    fn build_hasher_from<'f>(friend: &'f C) -> Self::Hasher
189    where
190        Self::Hasher: 'f,
191    {
192        ProvidesHasher::hasher(friend).build_hasher()
193    }
194}
195
196///
197/// Trait for types which provide a hashing implementation. This is automatically implemented
198/// for `HashMap` and `HashSet`. It allows the wrapper [`SumHashes`] to use the same
199/// hashing implementation for elements as is used for the whole hash result.
200///
201/// PRs are welcome to add features for collections from other crates which yield their hashers.
202///
203pub trait ProvidesHasher {
204    /// The type of the hashing implementation
205    type Hasher: BuildHasher;
206
207    /// Returns a reference to the used hasher
208    fn hasher(&self) -> &Self::Hasher;
209}
210
211impl<K, V, S> ProvidesHasher for HashMap<K, V, S>
212where
213    S: BuildHasher,
214{
215    type Hasher = S;
216
217    fn hasher(&self) -> &Self::Hasher {
218        HashMap::hasher(self)
219    }
220}
221
222impl<O, S> ProvidesHasher for HashSet<O, S>
223where
224    S: BuildHasher,
225{
226    type Hasher = S;
227
228    fn hasher(&self) -> &Self::Hasher {
229        HashSet::hasher(self)
230    }
231}
232
233impl<C: ProvidesHasher> Hash for SumHashes<C>
234where
235    for<'c> &'c C: IntoIterator,
236    for<'c> <&'c C as IntoIterator>::Item: Hash,
237{
238    fn hash<H: Hasher>(&self, state: &mut H) {
239        Hash::hash(&self.0, state)
240    }
241}
242
243impl<C, BH> Hash for SumHashesAnyCollection<C, BH>
244where
245    for<'c> &'c C: IntoIterator,
246    for<'c> <&'c C as IntoIterator>::Item: Hash,
247    BH: BuildHasherFromFriend<C>,
248{
249    fn hash<H: Hasher>(&self, state: &mut H) {
250        hash_by_summing_hashes_with::<C, H, BH>(&self.0, state)
251    }
252}
253
254impl<C: ProvidesHasher + IntoIterator> IntoIterator for SumHashes<C> {
255    type Item = <C as IntoIterator>::Item;
256    type IntoIter = <C as IntoIterator>::IntoIter;
257
258    fn into_iter(self) -> Self::IntoIter {
259        self.0 .0.into_iter()
260    }
261}
262
263impl<C: IntoIterator> IntoIterator for SumHashesAnyCollection<C> {
264    type Item = <C as IntoIterator>::Item;
265    type IntoIter = <C as IntoIterator>::IntoIter;
266
267    fn into_iter(self) -> Self::IntoIter {
268        self.0.into_iter()
269    }
270}
271
272impl<C: ProvidesHasher> Deref for SumHashes<C> {
273    type Target = C;
274
275    fn deref(&self) -> &Self::Target {
276        &self.0 .0
277    }
278}
279
280impl<C: ProvidesHasher> DerefMut for SumHashes<C> {
281    fn deref_mut(&mut self) -> &mut Self::Target {
282        &mut self.0 .0
283    }
284}
285
286impl<C> Deref for SumHashesAnyCollection<C> {
287    type Target = C;
288
289    fn deref(&self) -> &Self::Target {
290        &self.0
291    }
292}
293
294impl<C> DerefMut for SumHashesAnyCollection<C> {
295    fn deref_mut(&mut self) -> &mut Self::Target {
296        &mut self.0
297    }
298}
299
300#[cfg(test)]
301mod tests {
302    use super::*;
303
304    #[test]
305    fn maps_and_sets_impl_hash() {
306        static_assertions::assert_impl_all!(SumHashes<HashMap<i8, &str>>: Hash);
307        static_assertions::assert_impl_all!(SumHashes<HashSet<i8>>: Hash);
308    }
309
310    #[test]
311    fn any_collection_impl_hash() {
312        // In general, using an array with our library is a contractual violation
313        // However, this is a test, so it doesn't matter
314        static_assertions::assert_impl_all!(SumHashesAnyCollection<[&str; 5]>: Hash);
315    }
316
317    #[test]
318    fn default_if_possible() {
319        let _: SumHashes<HashMap<i8, &str>> = Default::default();
320    }
321
322    #[test]
323    fn same_elements_produce_identical_hash() {
324        // To simulate differences in iteration order, use sorting and different data structure
325        let unsorted = vec![(4, ""), (1, "hi"), (-3, "hello"), (20, "good bye")];
326        let mut sorted = Vec::from_iter(unsorted.clone());
327        sorted.sort();
328        let map: HashMap<i8, &str> = unsorted.iter().cloned().collect();
329        let sorted_map: HashMap<i8, &str> = sorted.iter().cloned().collect();
330        let set: HashSet<(i8, &str)> = unsorted.iter().cloned().collect();
331        let sorted_set: HashSet<(i8, &str)> = sorted.iter().cloned().collect();
332
333        macro_rules! gen_hash {
334            ($var:ident) => {{
335                let wrapper = SumHashesAnyCollection::<_, UseDefaultHasher>::from($var);
336                let mut hasher = DefaultHasher::new();
337                Hash::hash(&wrapper, &mut hasher);
338                hasher.finish()
339            }};
340        }
341        let all_hashes = [
342            gen_hash!(unsorted),
343            gen_hash!(sorted),
344            gen_hash!(map),
345            gen_hash!(sorted_map),
346            gen_hash!(set),
347            gen_hash!(sorted_set),
348        ];
349        let hash = all_hashes[0];
350        for other in all_hashes {
351            assert_eq!(hash, other);
352        }
353    }
354}