hash_iter/lib.rs
1#![doc = include_str!("../README.md")]
2
3use {std::hash, xxhash_rust::xxh3::Xxh3Builder};
4
5/// Provides an iterator over multiple hash values for a given key.
6pub trait HashIterHasher<T> {
7 /// Returns an iterator over `count` number of hash values generated using
8 /// enhanced double hashing.
9 fn hash_iter<K: hash::Hash + ?Sized>(&self, key: &K, count: usize) -> impl Iterator<Item = T>;
10}
11
12/// Builds hash iterator hasher -- a hasher capable of generating multiple hash
13/// values.
14pub trait BuildHashIterHasher<T> {
15 type Hasher: HashIterHasher<T>;
16
17 fn build_hash_iter_hasher(&self) -> Self::Hasher;
18}
19
20/// Holds the state for the hasher that implements enhanced double hashing.
21///
22/// Serves as a builder, allowing to configure the hasher with custom seeds,
23/// number of required hashes, and the size of the hash table.
24#[derive(Clone, Copy)]
25pub struct DoubleHashBuilder<T> {
26 seed1: T,
27 seed2: T,
28 n: T,
29}
30
31/// Enhanced double hashing hasher.
32///
33/// Emits an iterator (for a given input key) over hash values generated using
34/// enhanced double hashing.
35#[derive(Clone, Copy)]
36pub struct DoubleHashHasher<T, H1, H2> {
37 hash_builder1: H1,
38 hash_builder2: H2,
39 n: T,
40}
41
42/// Iterator over hash values generated using enhanced double hashing technique.
43///
44/// Implements enhanced double hashing technique as described in [Bloom Filters
45/// in Probabilistic Verification paper][1] (see section 5.1 for details).
46///
47/// Mathematically, the hash function is defined as:
48/// ```math
49/// h(i) = h1(k) + i * h2(k) + (i^3-i)/6 (mod n)
50/// ```
51///
52/// [1]: https://www.khoury.northeastern.edu/~pete/pub/bloom-filters-verification.pdf
53#[derive(Debug, Clone, Copy)]
54pub struct Hashes<T> {
55 /// The first hash point.
56 hash1: T,
57
58 /// The second hash point.
59 hash2: T,
60
61 /// The size of the hash table.
62 n: T,
63
64 /// The number of hash points to generate.
65 k: T,
66
67 /// The current number of hash points generated.
68 cnt: T,
69}
70
71/// Macro to generate implementations for different numeric types.
72macro_rules! impl_hash_iter_for_type {
73 ($num_type:ty, $type_name:expr) => {
74 impl DoubleHashBuilder<$num_type> {
75 /// Constructs a new hash iterator builder, with default seeds.
76 pub fn new() -> Self {
77 // Seeds for double hashing: essentially, we can use any seeds, to
78 // initialize the hasher (by default XXH3 uses `0`).
79 // Using wrapping_add to handle truncation for smaller types
80 let seed1 = (12345_u64 as $num_type).wrapping_add(0);
81 let seed2 = (67890_u64 as $num_type).wrapping_add(0);
82 let n = <$num_type>::MAX;
83 Self { seed1, seed2, n }
84 }
85
86 pub fn with_seed1(self, seed1: $num_type) -> Self {
87 Self { seed1, ..self }
88 }
89
90 pub fn with_seed2(self, seed2: $num_type) -> Self {
91 Self { seed2, ..self }
92 }
93
94 pub fn with_n(self, n: $num_type) -> Self {
95 Self { n, ..self }
96 }
97 }
98
99 impl Default for DoubleHashBuilder<$num_type> {
100 fn default() -> Self {
101 Self::new()
102 }
103 }
104
105 impl BuildHashIterHasher<$num_type> for DoubleHashBuilder<$num_type> {
106 type Hasher = DoubleHashHasher<$num_type, Xxh3Builder, Xxh3Builder>;
107
108 fn build_hash_iter_hasher(&self) -> Self::Hasher {
109 DoubleHashHasher::<$num_type, _, _>::with_hash_builders(
110 Xxh3Builder::new().with_seed(self.seed1 as u64),
111 Xxh3Builder::new().with_seed(self.seed2 as u64),
112 self.n,
113 )
114 }
115 }
116
117 impl<H1, H2> DoubleHashHasher<$num_type, H1, H2> {
118 pub fn with_hash_builders(hash_builder1: H1, hash_builder2: H2, n: $num_type) -> Self {
119 Self {
120 hash_builder1,
121 hash_builder2,
122 n,
123 }
124 }
125 }
126
127 impl<H1, H2> HashIterHasher<$num_type> for DoubleHashHasher<$num_type, H1, H2>
128 where
129 H1: hash::BuildHasher,
130 H2: hash::BuildHasher,
131 {
132 fn hash_iter<K: hash::Hash + ?Sized>(
133 &self,
134 key: &K,
135 count: usize,
136 ) -> impl Iterator<Item = $num_type> {
137 let hash1 = self.hash_builder1.hash_one(key);
138 let hash2 = self.hash_builder2.hash_one(key);
139
140 // Convert u64 hashes to target type
141 let x = hash1 as $num_type;
142 let y = hash2 as $num_type;
143
144 // Convert count to target type
145 // Safe: u32::MAX (4.3 billion) > any practical count value
146 // u64::MAX and u128::MAX are even larger
147 let count_t = count as $num_type;
148
149 Hashes::<$num_type>::new(x, y, self.n, count_t)
150 }
151 }
152
153 impl Hashes<$num_type> {
154 /// Constructs a new hash iterator.
155 ///
156 /// The iterator is configured with the given starting hash points, for the
157 /// hashmap of size `n`, with expected number of generated hash points
158 /// equal to `k`.
159 pub fn new(hash1: $num_type, hash2: $num_type, n: $num_type, k: $num_type) -> Self {
160 Self {
161 hash1,
162 hash2,
163 n,
164 k,
165 cnt: 0,
166 }
167 }
168 }
169
170 impl Iterator for Hashes<$num_type> {
171 type Item = $num_type;
172
173 /// Returns the next hash point using enhanced double hashing algorithm.
174 /// The computation is optimized using forward differencing.
175 fn next(&mut self) -> Option<Self::Item> {
176 if self.cnt == self.k {
177 return None;
178 }
179
180 // Helper function for modular addition: computes (a + b) mod n.
181 // Assumes a and b are already reduced mod n (i.e., a < n and b < n).
182 // This avoids overflow issues that arise with naive wrapping_add + rem.
183 let add_mod = |a: $num_type, b: $num_type, n: $num_type| -> $num_type {
184 debug_assert!(a < n && b < n, "operands must be reduced mod n");
185
186 // Check if a + b >= n by testing a >= n - b
187 // This is safe because b < n, so n - b doesn't underflow
188 if a >= n - b {
189 // a + b >= n, so result is (a + b) - n
190 // Compute as a - (n - b) to avoid overflow
191 a - (n - b)
192 } else {
193 // a + b < n, just add normally
194 a + b
195 }
196 };
197
198 if self.cnt == 0 {
199 self.cnt = self.cnt + 1;
200 // Reduce initial values on first iteration
201 self.hash1 = self.hash1 % self.n;
202 self.hash2 = self.hash2 % self.n;
203 return Some(self.hash1);
204 }
205
206 // Both hash1 and hash2 are now guaranteed to be < n (reduced in previous
207 // iteration).
208 self.hash1 = add_mod(self.hash1, self.hash2, self.n);
209 self.hash2 = add_mod(self.hash2, self.cnt, self.n);
210 self.cnt = self.cnt + 1;
211
212 Some(self.hash1)
213 }
214
215 fn size_hint(&self) -> (usize, Option<usize>) {
216 let remaining = (self.k - self.cnt) as usize;
217 (remaining, Some(remaining))
218 }
219 }
220
221 impl ExactSizeIterator for Hashes<$num_type> {}
222 };
223}
224
225impl DoubleHashHasher<u64, Xxh3Builder, Xxh3Builder> {
226 /// Constructs a new double hasher using default parameters.
227 pub fn new() -> Self {
228 DoubleHashBuilder::<u64>::new().build_hash_iter_hasher()
229 }
230}
231
232impl_hash_iter_for_type!(u32, "u32");
233impl_hash_iter_for_type!(u64, "u64");
234impl_hash_iter_for_type!(u128, "u128");
235
236#[cfg(test)]
237mod tests {
238 use {super::*, std::hash::BuildHasher};
239
240 /// Mathematical representation of the enhanced double hashing function.
241 /// h(i) = h1(k) + i * h2(k) + (i^3-i)/6 (mod n)
242 ///
243 /// Straight from the paper:
244 /// x, y := a(δ) MOD m, b(δ) MOD m
245 /// f[0] := x
246 /// for i := 1 .. k-1
247 /// x := (x + y) MOD m
248 /// y := (y + i) MOD m
249 /// f[i] := x
250 fn hasn_fn(i: u64, hash1: u64, hash2: u64, n: u64) -> u64 {
251 let x = hash1.wrapping_rem(n);
252 let y = hash2.wrapping_rem(n);
253 x.wrapping_add(y.wrapping_mul(i))
254 .wrapping_add((i.pow(3) - i) / 6)
255 .wrapping_rem(n)
256 }
257
258 #[test]
259 fn default_build_hash_iter() {
260 let key = "mykey";
261 let n = 1e9 as u64;
262 let k = 100;
263
264 let builder = DoubleHashBuilder::<u64>::new()
265 .with_n(n)
266 .with_seed1(1)
267 .with_seed2(2);
268
269 let hash_builder = Xxh3Builder::new();
270 let hash1 = hash_builder.with_seed(1).hash_one(key);
271 let hash2 = hash_builder.with_seed(2).hash_one(key);
272
273 let hasher = builder.build_hash_iter_hasher();
274 for (i, hash) in hasher.hash_iter(&key, k).enumerate() {
275 assert_eq!(hash, hasn_fn(i as u64, hash1, hash2, n));
276 }
277 }
278
279 #[test]
280 fn hashes_next() {
281 let hash_builder = Xxh3Builder::new();
282 let hash1 = hash_builder.with_seed(1).hash_one("mykey");
283 let hash2 = hash_builder.with_seed(2).hash_one("mykey");
284
285 let n = 1e9 as u64;
286 let k = 100;
287 let mut iter = Hashes::<u64>::new(hash1, hash2, n, k);
288 for i in 0..k {
289 assert_eq!(iter.next(), Some(hasn_fn(i as u64, hash1, hash2, n)));
290 }
291 }
292}