1#[allow(unused_imports)]
2use std::collections::hash_map::{DefaultHasher, RandomState};
3use std::hash::{BuildHasher, Hash, Hasher};
4
5use crate::{BuildHasherDefault, Error, Result};
6
7const XOR_MAX_ITERATIONS: usize = 100;
9
10#[inline]
11pub(crate) fn binary_fuse_murmur64(mut h: u64) -> u64 {
12 h ^= h >> 33;
13 h = h.wrapping_mul(0xff51afd7ed558ccd_u64);
14 h ^= h >> 33;
15 h = h.wrapping_mul(0xc4ceb9fe1a85ec53_u64);
16 h ^= h >> 33;
17 h
18}
19
20#[inline]
21pub(crate) fn binary_fuse_mix_split(key: u64, seed: u64) -> u64 {
22 binary_fuse_murmur64(key.wrapping_add(seed))
23}
24
25#[allow(dead_code)]
26#[inline]
27fn binary_fuse_rotl64(n: u64, c: u32) -> u64 {
28 n.rotate_left(c)
29}
30
31#[allow(dead_code)]
32#[inline]
33fn binary_fuse_reduce(hash: u32, n: u32) -> u32 {
34 (((hash as u64) * (n as u64)) >> 32) as u32
36}
37
38#[inline]
39fn binary_fuse8_fingerprint(hash: u64) -> u64 {
40 hash ^ (hash >> 32)
41}
42
43pub(crate) fn binary_fuse_rng_splitmix64(seed: &mut u64) -> u64 {
45 *seed = seed.wrapping_add(0x9E3779B97F4A7C15_u64);
46 let mut z = *seed;
47 z = (z ^ (z >> 30)).wrapping_mul(0xBF58476D1CE4E5B9_u64);
48 z = (z ^ (z >> 27)).wrapping_mul(0x94D049BB133111EB_u64);
49 z ^ (z >> 31)
50}
51
52#[inline]
53pub(crate) fn binary_fuse_mulhi(a: u64, b: u64) -> u64 {
54 (((a as u128) * (b as u128)) >> 64) as u64
55}
56
57#[inline]
58pub(crate) fn binary_fuse_calculate_segment_length(arity: u32, size: u32) -> u32 {
59 let ln_size = (size as f64).ln();
60
61 match arity {
64 3 => 1_u32 << ((ln_size / 3.33_f64.ln() + 2.25).floor() as u32),
65 4 => 1_u32 << ((ln_size / 2.91_f64.ln() - 0.50).floor() as u32),
66 _ => 65536,
67 }
68}
69
70#[inline]
71fn binary_fuse8_max(a: f64, b: f64) -> f64 {
72 if a < b {
73 b
74 } else {
75 a
76 }
77}
78
79#[inline]
80pub(crate) fn binary_fuse_calculate_size_factor(arity: u32, size: u32) -> f64 {
81 let ln_size = (size as f64).ln();
82 match arity {
83 3 => binary_fuse8_max(1.125, 0.875 + 0.250 * 1000000.0_f64.ln() / ln_size),
84 4 => binary_fuse8_max(1.075, 0.770 + 0.305 * 0600000.0_f64.ln() / ln_size),
85 _ => 2.0,
86 }
87}
88
89#[inline]
90pub(crate) fn binary_fuse_mod3(x: u8) -> u8 {
91 if x > 2 {
92 x - 3
93 } else {
94 x
95 }
96}
97
98pub struct Fuse8<H = BuildHasherDefault>
122where
123 H: BuildHasher,
124{
125 keys: Option<Vec<u64>>,
126 pub hash_builder: H,
127 pub seed: u64,
128 pub segment_length: u32,
129 pub segment_length_mask: u32,
130 pub segment_count: u32,
131 pub segment_count_length: u32,
132 pub finger_prints: Vec<u8>,
133}
134
135#[derive(Default)]
136pub(crate) struct BinaryHashes {
137 pub(crate) h0: u32,
138 pub(crate) h1: u32,
139 pub(crate) h2: u32,
140}
141
142impl<H> Fuse8<H>
143where
144 H: BuildHasher,
145{
146 #[inline]
147 fn binary_fuse8_hash_batch(&self, hash: u64) -> BinaryHashes {
148 let mut ans = BinaryHashes::default();
149
150 ans.h0 = binary_fuse_mulhi(hash, self.segment_count_length.into()) as u32;
151 ans.h1 = ans.h0 + self.segment_length;
152 ans.h2 = ans.h1 + self.segment_length;
153 ans.h1 ^= ((hash >> 18) as u32) & self.segment_length_mask;
154 ans.h2 ^= (hash as u32) & self.segment_length_mask;
155 ans
156 }
157
158 #[inline]
159 fn binary_fuse8_hash(&self, index: u32, hash: u64) -> u32 {
160 let mut h = binary_fuse_mulhi(hash, self.segment_count_length.into());
161 h += (index * self.segment_length) as u64;
162 let hh = hash & ((1_u64 << 36) - 1);
164 h ^= (hh >> (36 - 18 * index)) & (self.segment_length_mask as u64);
166
167 h as u32
168 }
169}
170
171impl<H> Fuse8<H>
172where
173 H: BuildHasher,
174{
175 pub fn new(size: u32) -> Fuse8<H>
178 where
179 H: Default,
180 {
181 Self::with_hasher(size, H::default())
182 }
183
184 pub fn with_hasher(size: u32, hash_builder: H) -> Fuse8<H> {
186 use std::cmp;
187
188 let arity = 3_u32;
189
190 let segment_length = match size {
191 0 => 4,
192 size => cmp::min(binary_fuse_calculate_segment_length(arity, size), 262144),
193 };
194
195 let segment_length_mask = segment_length - 1;
196 let mut array_length = {
197 let size_factor = binary_fuse_calculate_size_factor(arity, size);
198 let cap = match size {
199 0 | 1 => 0,
200 size => ((size as f64) * size_factor).round() as u32,
201 };
202 let n = ((cap + segment_length - 1) / segment_length).wrapping_sub(arity - 1);
203 (n.wrapping_add(arity) - 1) * segment_length
204 };
205
206 let mut segment_count = (array_length + segment_length - 1) / segment_length;
207 segment_count = if segment_count <= (arity - 1) {
208 1
209 } else {
210 segment_count - (arity - 1)
211 };
212
213 array_length = (segment_count + arity - 1) * segment_length;
214 let segment_count_length = segment_count * segment_length;
215
216 Fuse8 {
217 keys: Some(Vec::default()),
218 hash_builder,
219 seed: u64::default(),
220 segment_length,
221 segment_length_mask,
222 segment_count,
223 segment_count_length,
224 finger_prints: vec![0; array_length as usize],
225 }
226 }
227}
228
229impl<H> Fuse8<H>
230where
231 H: BuildHasher,
232{
233 #[inline]
235 pub fn size_of(&self) -> usize {
236 std::mem::size_of::<Self>() + self.finger_prints.len()
237 }
238
239 pub fn insert<K: ?Sized + Hash>(&mut self, key: &K) {
242 let digest = {
243 let mut hasher = self.hash_builder.build_hasher();
244 key.hash(&mut hasher);
245 hasher.finish()
246 };
247 self.keys.as_mut().unwrap().push(digest);
248 }
249
250 pub fn populate<K: Hash>(&mut self, keys: &[K]) {
254 keys.iter().for_each(|key| {
255 let mut hasher = self.hash_builder.build_hasher();
256 key.hash(&mut hasher);
257 self.keys.as_mut().unwrap().push(hasher.finish());
258 })
259 }
260
261 pub fn populate_keys(&mut self, digests: &[u64]) {
263 self.keys.as_mut().unwrap().extend_from_slice(&digests);
264 }
265
266 pub fn build(&mut self) -> Result<()> {
277 match self.keys.take() {
278 Some(keys) => self.build_keys(&keys),
279 None => Ok(()),
280 }
281 }
282
283 pub fn build_keys(&mut self, digests: &[u64]) -> Result<()> {
290 let mut rng_counter = 0x726b2b9d438b9d4d_u64;
291 let capacity = self.finger_prints.len();
292 let size = digests.len();
293
294 self.seed = binary_fuse_rng_splitmix64(&mut rng_counter);
295
296 let mut reverse_order: Vec<u64> = vec![0; size + 1];
297 let mut reverse_h: Vec<u8> = vec![0; size];
298 let mut alone: Vec<u32> = vec![0; capacity as usize];
299 let mut t2count: Vec<u8> = vec![0; capacity as usize];
300 let mut t2hash: Vec<u64> = vec![0; capacity as usize];
301
302 let mut block_bits: u32 = 1;
303 while (1_u32 << block_bits) < self.segment_count {
304 block_bits += 1;
305 }
306
307 let block = 1_u32 << block_bits;
308
309 let mut start_pos: Vec<u32> = vec![0; 1 << block_bits];
310
311 let mut h012 = [0_u32; 5];
312
313 reverse_order[size] = 1; let mut iter = 0..=XOR_MAX_ITERATIONS;
315 loop {
316 if iter.next().is_none() {
317 err_at!(Fatal, msg: "Too many iterations. Are all your keys unique?")?;
318 }
319
320 for i in 0_u32..block {
321 start_pos[i as usize] =
324 (((i as u64) * (size as u64)) >> block_bits) as u32;
325 }
326
327 let mask_block = (block - 1) as u64;
328 for (_, digest) in digests.iter().enumerate().take(size) {
329 let hash: u64 = binary_fuse_murmur64(digest.wrapping_add(self.seed));
330 let mut segment_index: u64 = hash >> (64 - block_bits);
331 while reverse_order[start_pos[segment_index as usize] as usize] != 0 {
332 segment_index += 1;
333 segment_index &= mask_block;
334 }
335 reverse_order[start_pos[segment_index as usize] as usize] = hash;
336 start_pos[segment_index as usize] += 1;
337 }
338
339 let mut error: isize = 0;
340 let mut duplicates = 0;
341 for (_, rev_order) in reverse_order.iter().enumerate().take(size) {
342 let hash: u64 = *rev_order;
343
344 let h0: usize = self.binary_fuse8_hash(0, hash) as usize;
345 t2count[h0] = t2count[h0].wrapping_add(4);
346 t2hash[h0] ^= hash;
347
348 let h1: usize = self.binary_fuse8_hash(1, hash) as usize;
349 t2count[h1] = t2count[h1].wrapping_add(4);
350 t2count[h1] ^= 1;
351 t2hash[h1] ^= hash;
352
353 let h2: usize = self.binary_fuse8_hash(2, hash) as usize;
354 t2count[h2] = t2count[h2].wrapping_add(4);
355 t2hash[h2] ^= hash;
356 t2count[h2] ^= 2;
357
358 if (t2hash[h0] & t2hash[h1] & t2hash[h2]) == 0 {
361 if ((t2hash[h0] == 0) && (t2count[h0] == 8))
363 || ((t2hash[h1] == 0) && (t2count[h1] == 8))
364 || ((t2hash[h2] == 0) && (t2count[h2] == 8))
365 {
366 duplicates += 1;
367 t2count[h0] = t2count[h0].wrapping_sub(4);
368 t2hash[h0] ^= hash;
369 t2count[h1] = t2count[h1].wrapping_sub(4);
370 t2count[h1] ^= 1;
371 t2hash[h1] ^= hash;
372 t2count[h2] = t2count[h2].wrapping_sub(4);
373 t2hash[h2] ^= hash;
374 t2count[h2] ^= 2;
375 }
376 }
377
378 error = if t2count[h0] < 4 { 1 } else { error };
379 error = if t2count[h1] < 4 { 1 } else { error };
380 error = if t2count[h2] < 4 { 1 } else { error };
381 }
382
383 if error > 0 {
384 continue;
385 }
386
387 let mut q_size = 0_usize; for (i, x) in t2count.iter().enumerate().take(capacity) {
391 alone[q_size] = i as u32;
392 q_size += if (x >> 2) == 1 { 1 } else { 0 };
393 }
394
395 let mut stack_size = 0_usize;
396
397 while q_size > 0 {
398 q_size -= 1;
399 let index = alone[q_size] as usize;
400 if (t2count[index] >> 2) == 1 {
401 let hash: u64 = t2hash[index];
402
403 h012[1] = self.binary_fuse8_hash(1, hash);
405 h012[2] = self.binary_fuse8_hash(2, hash);
406 h012[3] = self.binary_fuse8_hash(0, hash); h012[4] = h012[1];
408
409 let found: u8 = t2count[index] & 3;
410 reverse_h[stack_size] = found;
411 reverse_order[stack_size] = hash;
412 stack_size += 1;
413
414 let other_index1: u32 = h012[(found + 1) as usize];
415 alone[q_size] = other_index1;
416 q_size += if (t2count[other_index1 as usize] >> 2) == 2 {
417 1
418 } else {
419 0
420 };
421
422 t2count[other_index1 as usize] -= 4;
423 t2count[other_index1 as usize] ^= binary_fuse_mod3(found + 1);
424 t2hash[other_index1 as usize] ^= hash;
425
426 let other_index2: u32 = h012[(found + 2) as usize];
427 alone[q_size] = other_index2;
428 q_size += if (t2count[other_index2 as usize] >> 2) == 2 {
429 1
430 } else {
431 0
432 };
433 t2count[other_index2 as usize] -= 4;
434 t2count[other_index2 as usize] ^= binary_fuse_mod3(found + 2);
435 t2hash[other_index2 as usize] ^= hash;
436 }
437 }
438
439 if (stack_size + duplicates) == size {
440 break; }
442
443 reverse_order.fill(0);
444 reverse_order[size] = 1; t2count.fill(0);
446 t2hash.fill(0);
447
448 self.seed = binary_fuse_rng_splitmix64(&mut rng_counter);
449 }
450
451 if size == 0 {
452 return Ok(());
453 }
454
455 for i in (0_usize..size).rev() {
456 let hash: u64 = reverse_order[i];
458 let xor2: u8 = binary_fuse8_fingerprint(hash) as u8;
459 let found: usize = reverse_h[i] as usize;
460 h012[0] = self.binary_fuse8_hash(0, hash);
461 h012[1] = self.binary_fuse8_hash(1, hash);
462 h012[2] = self.binary_fuse8_hash(2, hash);
463 h012[3] = h012[0];
464 h012[4] = h012[1];
465 self.finger_prints[h012[found] as usize] = xor2
466 ^ self.finger_prints[h012[found + 1] as usize]
467 ^ self.finger_prints[h012[found + 2] as usize];
468 }
469
470 Ok(())
471 }
472}
473
474impl<H> Fuse8<H>
475where
476 H: BuildHasher,
477{
478 pub fn contains<K: ?Sized + Hash>(&self, key: &K) -> bool {
481 let digest = {
482 let mut hasher = self.hash_builder.build_hasher();
483 key.hash(&mut hasher);
484 hasher.finish()
485 };
486 self.contains_key(digest)
487 }
488
489 pub fn contains_key(&self, digest: u64) -> bool {
492 let hash = binary_fuse_mix_split(digest, self.seed);
493 let mut f = binary_fuse8_fingerprint(hash) as u8;
494 let BinaryHashes { h0, h1, h2 } = self.binary_fuse8_hash_batch(hash);
495 f ^= self.finger_prints[h0 as usize]
496 ^ self.finger_prints[h1 as usize]
497 ^ self.finger_prints[h2 as usize];
498 f == 0
499 }
500
501 #[allow(dead_code)]
502 fn get_hasher(&self) -> H::Hasher {
503 self.hash_builder.build_hasher()
504 }
505}
506
507#[cfg(test)]
508#[path = "fuse8_test.rs"]
509mod fuse8_test;