1#[allow(unused_imports)]
2use std::collections::hash_map::{DefaultHasher, RandomState};
3use std::{
4 collections::BTreeMap,
5 hash::{BuildHasher, Hash, Hasher},
6};
7
8use crate::{
9 fuse8::{
10 binary_fuse_calculate_segment_length, binary_fuse_calculate_size_factor,
11 binary_fuse_mix_split, binary_fuse_mod3, binary_fuse_mulhi, binary_fuse_murmur64,
12 binary_fuse_rng_splitmix64, BinaryHashes,
13 },
14 BuildHasherDefault, Error, Result,
15};
16
17const XOR_MAX_ITERATIONS: usize = 100;
19
20#[inline]
21pub fn binary_fuse16_fingerprint(hash: u64) -> u64 {
22 hash ^ (hash >> 32)
23}
24
25pub struct Fuse16<H = BuildHasherDefault>
43where
44 H: BuildHasher,
45{
46 keys: Option<BTreeMap<u64, ()>>,
47 pub hash_builder: H,
48 pub seed: u64,
49 pub segment_length: u32,
50 pub segment_length_mask: u32,
51 pub segment_count: u32,
52 pub segment_count_length: u32,
53 pub finger_prints: Vec<u16>,
54}
55
56impl<H> Fuse16<H>
57where
58 H: BuildHasher,
59{
60 #[inline]
61 fn binary_fuse16_hash_batch(&self, hash: u64) -> BinaryHashes {
62 let mut ans = BinaryHashes::default();
63
64 ans.h0 = binary_fuse_mulhi(hash, self.segment_count_length.into()) as u32;
65 ans.h1 = ans.h0 + self.segment_length;
66 ans.h2 = ans.h1 + self.segment_length;
67 ans.h1 ^= ((hash >> 18) as u32) & self.segment_length_mask;
68 ans.h2 ^= (hash as u32) & self.segment_length_mask;
69 ans
70 }
71
72 #[inline]
73 fn binary_fuse16_hash(&self, index: u32, hash: u64) -> u32 {
74 let mut h = binary_fuse_mulhi(hash, self.segment_count_length.into());
75 h += (index * self.segment_length) as u64;
76 let hh = hash & ((1_u64 << 36) - 1);
78 h ^= (hh >> (36 - 18 * index)) & (self.segment_length_mask as u64);
80
81 h as u32
82 }
83}
84
85impl<H> Fuse16<H>
86where
87 H: BuildHasher,
88{
89 pub fn new(size: u32) -> Fuse16<H>
92 where
93 H: Default,
94 {
95 Self::with_hasher(size, H::default())
96 }
97
98 pub fn with_hasher(size: u32, hash_builder: H) -> Fuse16<H> {
100 use std::cmp;
101
102 let arity = 3_u32;
103
104 let segment_length = match size {
105 0 => 4,
106 size => cmp::min(binary_fuse_calculate_segment_length(arity, size), 262144),
107 };
108
109 let segment_length_mask = segment_length - 1;
110 let mut array_length = {
111 let size_factor = binary_fuse_calculate_size_factor(arity, size);
112 let cap = match size {
113 0 | 1 => 0,
114 size => ((size as f64) * size_factor).round() as u32,
115 };
116 let n = ((cap + segment_length - 1) / segment_length).wrapping_sub(arity - 1);
117 (n.wrapping_add(arity) - 1) * segment_length
118 };
119
120 let mut segment_count = (array_length + segment_length - 1) / segment_length;
121 segment_count = if segment_count <= (arity - 1) {
122 1
123 } else {
124 segment_count - (arity - 1)
125 };
126
127 array_length = (segment_count + arity - 1) * segment_length;
128 let segment_count_length = segment_count * segment_length;
129
130 Fuse16 {
131 keys: Some(BTreeMap::new()),
132 hash_builder,
133 seed: u64::default(),
134 segment_length,
135 segment_length_mask,
136 segment_count,
137 segment_count_length,
138 finger_prints: vec![0; array_length as usize],
139 }
140 }
141}
142
143impl<H> Fuse16<H>
144where
145 H: BuildHasher,
146{
147 #[inline]
149 pub fn size_of(&self) -> usize {
150 std::mem::size_of::<Self>() + (self.finger_prints.len() * 2)
151 }
152
153 pub fn insert<K: ?Sized + Hash>(&mut self, key: &K) {
156 let digest = {
157 let mut hasher = self.hash_builder.build_hasher();
158 key.hash(&mut hasher);
159 hasher.finish()
160 };
161 self.keys.as_mut().unwrap().insert(digest, ());
162 }
163
164 pub fn populate<K: Hash>(&mut self, keys: &[K]) {
168 keys.iter().for_each(|key| {
169 let mut hasher = self.hash_builder.build_hasher();
170 key.hash(&mut hasher);
171 self.keys.as_mut().unwrap().insert(hasher.finish(), ());
172 })
173 }
174
175 pub fn populate_keys(&mut self, digests: &[u64]) {
177 for digest in digests.iter() {
178 self.keys.as_mut().unwrap().insert(*digest, ());
179 }
180 }
181 pub fn build(&mut self) -> Result<()> {
192 match self.keys.take() {
193 Some(keys) => {
194 let digests = keys.iter().map(|(k, _)| *k).collect::<Vec<u64>>();
195 self.build_keys(&digests)
196 }
197 None => Ok(()),
198 }
199 }
200
201 pub fn build_keys(&mut self, digests: &[u64]) -> Result<()> {
208 let mut rng_counter = 0x726b2b9d438b9d4d_u64;
209 let capacity = self.finger_prints.len();
210 let size = digests.len();
211
212 self.seed = binary_fuse_rng_splitmix64(&mut rng_counter);
213 let mut reverse_order: Vec<u64> = vec![0; size + 1];
214 let mut reverse_h: Vec<u8> = vec![0; size];
215 let mut alone: Vec<u32> = vec![0; capacity];
216 let mut t2count: Vec<u8> = vec![0; capacity];
217 let mut t2hash: Vec<u64> = vec![0; capacity];
218
219 let mut block_bits: u32 = 1;
220 while (1_u32 << block_bits) < self.segment_count {
221 block_bits += 1;
222 }
223 let block = 1_u32 << block_bits;
224
225 let mut start_pos: Vec<u32> = vec![0; 1 << block_bits];
226
227 let mut h012 = [0_u32; 5];
228
229 reverse_order[size] = 1; let mut iter = 0..=XOR_MAX_ITERATIONS;
231 loop {
232 if iter.next().is_none() {
233 err_at!(Fatal, msg: "Too many iterations. Are all your keys unique?")?;
234 }
235
236 for i in 0_u32..block {
237 start_pos[i as usize] =
240 (((i as u64) * (size as u64)) >> block_bits) as u32;
241 }
242
243 let mask_block = (block - 1) as u64;
244 for (_, digest) in digests.iter().enumerate().take(size) {
245 let hash: u64 = binary_fuse_murmur64(digest.wrapping_add(self.seed));
246 let mut segment_index: u64 = hash >> (64 - block_bits);
247 while reverse_order[start_pos[segment_index as usize] as usize] != 0 {
248 segment_index += 1;
249 segment_index &= mask_block;
250 }
251 reverse_order[start_pos[segment_index as usize] as usize] = hash;
252 start_pos[segment_index as usize] += 1;
253 }
254
255 let mut error: isize = 0;
256 for (_, rev_order) in reverse_order.iter().enumerate().take(size) {
257 let hash: u64 = *rev_order;
258
259 let h0: usize = self.binary_fuse16_hash(0, hash) as usize;
260 t2count[h0] = t2count[h0].wrapping_add(4);
261 t2hash[h0] ^= hash;
262
263 let h1: usize = self.binary_fuse16_hash(1, hash) as usize;
264 t2count[h1] = t2count[h1].wrapping_add(4);
265 t2count[h1] ^= 1;
266 t2hash[h1] ^= hash;
267
268 let h2: usize = self.binary_fuse16_hash(2, hash) as usize;
269 t2count[h2] = t2count[h2].wrapping_add(4);
270 t2hash[h2] ^= hash;
271 t2count[h2] ^= 2;
272
273 error = if t2count[h0] < 4 { 1 } else { error };
274 error = if t2count[h1] < 4 { 1 } else { error };
275 error = if t2count[h2] < 4 { 1 } else { error };
276 }
277
278 if error > 0 {
279 continue;
280 }
281
282 let mut q_size = 0_usize; for (i, x) in t2count.iter().enumerate().take(capacity) {
286 alone[q_size] = i as u32;
287 q_size += if (x >> 2) == 1 { 1 } else { 0 };
288 }
289
290 let mut stack_size = 0_usize;
291
292 while q_size > 0 {
293 q_size -= 1;
294 let index = alone[q_size] as usize;
295 if (t2count[index] >> 2) == 1 {
296 let hash: u64 = t2hash[index];
297
298 h012[1] = self.binary_fuse16_hash(1, hash);
300 h012[2] = self.binary_fuse16_hash(2, hash);
301 h012[3] = self.binary_fuse16_hash(0, hash); h012[4] = h012[1];
303
304 let found: u8 = t2count[index] & 3;
305 reverse_h[stack_size] = found;
306 reverse_order[stack_size] = hash;
307 stack_size += 1;
308
309 let other_index1: u32 = h012[(found + 1) as usize];
310 alone[q_size] = other_index1;
311 q_size += if (t2count[other_index1 as usize] >> 2) == 2 {
312 1
313 } else {
314 0
315 };
316
317 t2count[other_index1 as usize] -= 4;
318 t2count[other_index1 as usize] ^= binary_fuse_mod3(found + 1);
319 t2hash[other_index1 as usize] ^= hash;
320
321 let other_index2: u32 = h012[(found + 2) as usize];
322 alone[q_size] = other_index2;
323 q_size += if (t2count[other_index2 as usize] >> 2) == 2 {
324 1
325 } else {
326 0
327 };
328 t2count[other_index2 as usize] -= 4;
329 t2count[other_index2 as usize] ^= binary_fuse_mod3(found + 2);
330 t2hash[other_index2 as usize] ^= hash;
331 }
332 }
333
334 if stack_size == size {
335 break; }
337
338 reverse_order.fill(0);
339 reverse_order[size] = 1; t2count.fill(0);
341 t2hash.fill(0);
342
343 self.seed = binary_fuse_rng_splitmix64(&mut rng_counter);
344 }
345
346 for i in (0_usize..size).rev() {
347 let hash: u64 = reverse_order[i];
349 let xor2: u16 = binary_fuse16_fingerprint(hash) as u16;
350 let found: usize = reverse_h[i] as usize;
351 h012[0] = self.binary_fuse16_hash(0, hash);
352 h012[1] = self.binary_fuse16_hash(1, hash);
353 h012[2] = self.binary_fuse16_hash(2, hash);
354 h012[3] = h012[0];
355 h012[4] = h012[1];
356
357 self.finger_prints[h012[found] as usize] = xor2
358 ^ self.finger_prints[h012[found + 1] as usize]
359 ^ self.finger_prints[h012[found + 2] as usize];
360 }
361
362 Ok(())
363 }
364}
365
366impl<H> Fuse16<H>
367where
368 H: BuildHasher,
369{
370 pub fn contains<K: ?Sized + Hash>(&self, key: &K) -> bool {
373 let digest = {
374 let mut hasher = self.hash_builder.build_hasher();
375 key.hash(&mut hasher);
376 hasher.finish()
377 };
378 self.contains_key(digest)
379 }
380
381 pub fn contains_key(&self, digest: u64) -> bool {
384 let hash = binary_fuse_mix_split(digest, self.seed);
385 let mut f = binary_fuse16_fingerprint(hash) as u16;
386 let BinaryHashes { h0, h1, h2 } = self.binary_fuse16_hash_batch(hash);
387 f ^= self.finger_prints[h0 as usize]
388 ^ self.finger_prints[h1 as usize]
389 ^ self.finger_prints[h2 as usize];
390 f == 0
391 }
392
393 #[allow(dead_code)]
394 fn get_hasher(&self) -> H::Hasher {
395 self.hash_builder.build_hasher()
396 }
397}
398
399#[cfg(test)]
400#[path = "fuse16_test.rs"]
401mod fuse16_test;