hash_iter/
lib.rs

1#![doc = include_str!("../README.md")]
2
3use {std::hash, xxhash_rust::xxh3::Xxh3Builder};
4
5/// Provides an iterator over multiple hash values for a given key.
6pub trait HashIterHasher<T> {
7    /// Returns an iterator over `count` number of hash values generated using
8    /// enhanced double hashing.
9    fn hash_iter<K: hash::Hash + ?Sized>(&self, key: &K, count: usize) -> impl Iterator<Item = T>;
10}
11
12/// Builds hash iterator hasher -- a hasher capable of generating multiple hash
13/// values.
14pub trait BuildHashIterHasher<T> {
15    type Hasher: HashIterHasher<T>;
16
17    fn build_hash_iter_hasher(&self) -> Self::Hasher;
18}
19
20/// Holds the state for the hasher that implements enhanced double hashing.
21///
22/// Serves as a builder, allowing to configure the hasher with custom seeds,
23/// number of required hashes, and the size of the hash table.
24#[derive(Clone, Copy)]
25pub struct DoubleHashBuilder<T> {
26    seed1: T,
27    seed2: T,
28    n: T,
29}
30
31/// Enhanced double hashing hasher.
32///
33/// Emits an iterator (for a given input key) over hash values generated using
34/// enhanced double hashing.
35#[derive(Clone, Copy)]
36pub struct DoubleHashHasher<T, H1, H2> {
37    hash_builder1: H1,
38    hash_builder2: H2,
39    n: T,
40}
41
42/// Iterator over hash values generated using enhanced double hashing technique.
43///
44/// Implements enhanced double hashing technique as described in [Bloom Filters
45/// in Probabilistic Verification paper][1] (see section 5.1 for details).
46///
47/// Mathematically, the hash function is defined as:
48/// ```math
49/// h(i) = h1(k) + i * h2(k) + (i^3-i)/6 (mod n)
50/// ```
51///
52/// [1]: https://www.khoury.northeastern.edu/~pete/pub/bloom-filters-verification.pdf
53#[derive(Debug, Clone, Copy)]
54pub struct Hashes<T> {
55    /// The first hash point.
56    hash1: T,
57
58    /// The second hash point.
59    hash2: T,
60
61    /// The size of the hash table.
62    n: T,
63
64    /// The number of hash points to generate.
65    k: T,
66
67    /// The current number of hash points generated.
68    cnt: T,
69}
70
71/// Macro to generate implementations for different numeric types.
72macro_rules! impl_hash_iter_for_type {
73    ($num_type:ty, $type_name:expr) => {
74        impl DoubleHashBuilder<$num_type> {
75            /// Constructs a new hash iterator builder, with default seeds.
76            pub fn new() -> Self {
77                // Seeds for double hashing: essentially, we can use any seeds, to
78                // initialize the hasher (by default XXH3 uses `0`).
79                // Using wrapping_add to handle truncation for smaller types
80                let seed1 = (12345_u64 as $num_type).wrapping_add(0);
81                let seed2 = (67890_u64 as $num_type).wrapping_add(0);
82                let n = <$num_type>::MAX;
83                Self { seed1, seed2, n }
84            }
85
86            pub fn with_seed1(self, seed1: $num_type) -> Self {
87                Self { seed1, ..self }
88            }
89
90            pub fn with_seed2(self, seed2: $num_type) -> Self {
91                Self { seed2, ..self }
92            }
93
94            pub fn with_n(self, n: $num_type) -> Self {
95                Self { n, ..self }
96            }
97        }
98
99        impl Default for DoubleHashBuilder<$num_type> {
100            fn default() -> Self {
101                Self::new()
102            }
103        }
104
105        impl BuildHashIterHasher<$num_type> for DoubleHashBuilder<$num_type> {
106            type Hasher = DoubleHashHasher<$num_type, Xxh3Builder, Xxh3Builder>;
107
108            fn build_hash_iter_hasher(&self) -> Self::Hasher {
109                DoubleHashHasher::<$num_type, _, _>::with_hash_builders(
110                    Xxh3Builder::new().with_seed(self.seed1 as u64),
111                    Xxh3Builder::new().with_seed(self.seed2 as u64),
112                    self.n,
113                )
114            }
115        }
116
117        impl<H1, H2> DoubleHashHasher<$num_type, H1, H2> {
118            pub fn with_hash_builders(hash_builder1: H1, hash_builder2: H2, n: $num_type) -> Self {
119                Self {
120                    hash_builder1,
121                    hash_builder2,
122                    n,
123                }
124            }
125        }
126
127        impl<H1, H2> HashIterHasher<$num_type> for DoubleHashHasher<$num_type, H1, H2>
128        where
129            H1: hash::BuildHasher,
130            H2: hash::BuildHasher,
131        {
132            fn hash_iter<K: hash::Hash + ?Sized>(
133                &self,
134                key: &K,
135                count: usize,
136            ) -> impl Iterator<Item = $num_type> {
137                let hash1 = self.hash_builder1.hash_one(key);
138                let hash2 = self.hash_builder2.hash_one(key);
139
140                // Convert u64 hashes to target type
141                let x = hash1 as $num_type;
142                let y = hash2 as $num_type;
143
144                // Convert count to target type
145                // Safe: u32::MAX (4.3 billion) > any practical count value
146                // u64::MAX and u128::MAX are even larger
147                let count_t = count as $num_type;
148
149                Hashes::<$num_type>::new(x, y, self.n, count_t)
150            }
151        }
152
153        impl Hashes<$num_type> {
154            /// Constructs a new hash iterator.
155            ///
156            /// The iterator is configured with the given starting hash points, for the
157            /// hashmap of size `n`, with expected number of generated hash points
158            /// equal to `k`.
159            pub fn new(hash1: $num_type, hash2: $num_type, n: $num_type, k: $num_type) -> Self {
160                Self {
161                    hash1,
162                    hash2,
163                    n,
164                    k,
165                    cnt: 0,
166                }
167            }
168        }
169
170        impl Iterator for Hashes<$num_type> {
171            type Item = $num_type;
172
173            /// Returns the next hash point using enhanced double hashing algorithm.
174            /// The computation is optimized using forward differencing.
175            fn next(&mut self) -> Option<Self::Item> {
176                if self.cnt == self.k {
177                    return None;
178                }
179
180                // Helper function for modular addition: computes (a + b) mod n.
181                // Assumes a and b are already reduced mod n (i.e., a < n and b < n).
182                // This avoids overflow issues that arise with naive wrapping_add + rem.
183                let add_mod = |a: $num_type, b: $num_type, n: $num_type| -> $num_type {
184                    debug_assert!(a < n && b < n, "operands must be reduced mod n");
185
186                    // Check if a + b >= n by testing a >= n - b
187                    // This is safe because b < n, so n - b doesn't underflow
188                    if a >= n - b {
189                        // a + b >= n, so result is (a + b) - n
190                        // Compute as a - (n - b) to avoid overflow
191                        a - (n - b)
192                    } else {
193                        // a + b < n, just add normally
194                        a + b
195                    }
196                };
197
198                if self.cnt == 0 {
199                    self.cnt = self.cnt + 1;
200                    // Reduce initial values on first iteration
201                    self.hash1 = self.hash1 % self.n;
202                    self.hash2 = self.hash2 % self.n;
203                    return Some(self.hash1);
204                }
205
206                // Both hash1 and hash2 are now guaranteed to be < n (reduced in previous
207                // iteration).
208                self.hash1 = add_mod(self.hash1, self.hash2, self.n);
209                self.hash2 = add_mod(self.hash2, self.cnt, self.n);
210                self.cnt = self.cnt + 1;
211
212                Some(self.hash1)
213            }
214
215            fn size_hint(&self) -> (usize, Option<usize>) {
216                let remaining = (self.k - self.cnt) as usize;
217                (remaining, Some(remaining))
218            }
219        }
220
221        impl ExactSizeIterator for Hashes<$num_type> {}
222    };
223}
224
225impl DoubleHashHasher<u64, Xxh3Builder, Xxh3Builder> {
226    /// Constructs a new double hasher using default parameters.
227    pub fn new() -> Self {
228        DoubleHashBuilder::<u64>::new().build_hash_iter_hasher()
229    }
230}
231
232impl_hash_iter_for_type!(u32, "u32");
233impl_hash_iter_for_type!(u64, "u64");
234impl_hash_iter_for_type!(u128, "u128");
235
236#[cfg(test)]
237mod tests {
238    use {super::*, std::hash::BuildHasher};
239
240    /// Mathematical representation of the enhanced double hashing function.
241    /// h(i) = h1(k) + i * h2(k) + (i^3-i)/6 (mod n)
242    ///
243    /// Straight from the paper:
244    /// x, y := a(δ) MOD m, b(δ) MOD m
245    /// f[0] := x
246    /// for i := 1 .. k-1
247    ///   x := (x + y) MOD m
248    ///   y := (y + i) MOD m
249    ///   f[i] := x
250    fn hasn_fn(i: u64, hash1: u64, hash2: u64, n: u64) -> u64 {
251        let x = hash1.wrapping_rem(n);
252        let y = hash2.wrapping_rem(n);
253        x.wrapping_add(y.wrapping_mul(i))
254            .wrapping_add((i.pow(3) - i) / 6)
255            .wrapping_rem(n)
256    }
257
258    #[test]
259    fn default_build_hash_iter() {
260        let key = "mykey";
261        let n = 1e9 as u64;
262        let k = 100;
263
264        let builder = DoubleHashBuilder::<u64>::new()
265            .with_n(n)
266            .with_seed1(1)
267            .with_seed2(2);
268
269        let hash_builder = Xxh3Builder::new();
270        let hash1 = hash_builder.with_seed(1).hash_one(key);
271        let hash2 = hash_builder.with_seed(2).hash_one(key);
272
273        let hasher = builder.build_hash_iter_hasher();
274        for (i, hash) in hasher.hash_iter(&key, k).enumerate() {
275            assert_eq!(hash, hasn_fn(i as u64, hash1, hash2, n));
276        }
277    }
278
279    #[test]
280    fn hashes_next() {
281        let hash_builder = Xxh3Builder::new();
282        let hash1 = hash_builder.with_seed(1).hash_one("mykey");
283        let hash2 = hash_builder.with_seed(2).hash_one("mykey");
284
285        let n = 1e9 as u64;
286        let k = 100;
287        let mut iter = Hashes::<u64>::new(hash1, hash2, n, k);
288        for i in 0..k {
289            assert_eq!(iter.next(), Some(hasn_fn(i as u64, hash1, hash2, n)));
290        }
291    }
292}