bloom_filters/
hash.rs

1use std::collections::hash_map::DefaultHasher;
2use std::hash::{BuildHasher, Hash, Hasher};
3
4// Calculates the optimal number of hash functions to use for a Bloom
5// filter based on the desired rate of false positives.
6pub fn compute_k_num(fp_rate: f64) -> usize {
7    debug_assert!(fp_rate > 0.0 && fp_rate < 1.0);
8    fp_rate.log2().abs().ceil() as usize
9}
10
11/// A trait for creating hash iterator of item.
12pub trait HashKernels {
13    type HI: Iterator<Item = usize>;
14
15    fn hash_iter<T: Hash>(&self, item: &T) -> Self::HI;
16}
17
18/// A trait for creating instances of [`HashKernels`].
19pub trait BuildHashKernels
20where
21    Self: Sized,
22{
23    type HK: HashKernels;
24
25    fn with_fp_rate(self, fp_rate: f64, n: usize) -> Self::HK {
26        self.with_k(compute_k_num(fp_rate), n)
27    }
28
29    fn with_k(self, k: usize, n: usize) -> Self::HK;
30}
31
32/// Used to create a DefaultHashKernels instance.
33pub struct DefaultBuildHashKernels<BH> {
34    hash_seed: usize,
35    build_hasher: BH,
36}
37
38impl<BH: BuildHasher> DefaultBuildHashKernels<BH> {
39    pub fn new(hash_seed: usize, build_hasher: BH) -> Self {
40        Self { hash_seed, build_hasher }
41    }
42}
43
44impl<BH: BuildHasher> BuildHashKernels for DefaultBuildHashKernels<BH> {
45    type HK = DefaultHashKernels<BH>;
46
47    fn with_k(self, k: usize, n: usize) -> Self::HK {
48        Self::HK {
49            k,
50            n,
51            hash_seed: self.hash_seed,
52            build_hasher: self.build_hasher,
53        }
54    }
55}
56
57/// A default implementation of [Kirsch-Mitzenmacher-Optimization](https://www.eecs.harvard.edu/~michaelm/postscripts/tr-02-05.pdf) hash function
58pub struct DefaultHashKernels<BH> {
59    k: usize,         // numbers of hash iterating
60    n: usize,         // filter size
61    hash_seed: usize, // seed offset for anonymity and privacy purpose
62    build_hasher: BH,
63}
64
65impl<BH: BuildHasher> HashKernels for DefaultHashKernels<BH> {
66    type HI = DefaultHashIter;
67
68    fn hash_iter<T: Hash>(&self, item: &T) -> Self::HI {
69        let hasher = &mut self.build_hasher.build_hasher();
70        item.hash(hasher);
71        let result = hasher.finish();
72
73        DefaultHashIter::new(result, self.k, self.n, self.hash_seed)
74    }
75}
76
77pub struct DefaultHashIter {
78    h1: usize,
79    h2: usize,
80    k: usize,
81    n: usize,
82    hash_seed: usize,
83    counter: usize,
84}
85
86impl DefaultHashIter {
87    fn new(hash: u64, k: usize, n: usize, hash_seed: usize) -> Self {
88        Self {
89            h1: (hash as u32) as usize,
90            h2: (hash >> 32) as usize,
91            k,
92            n,
93            hash_seed,
94            counter: 0,
95        }
96    }
97}
98
99impl Iterator for DefaultHashIter {
100    type Item = usize;
101
102    fn next(&mut self) -> Option<usize> {
103        if self.k == self.counter {
104            return None;
105        }
106        let r = self
107            .hash_seed
108            .wrapping_add(self.h1)
109            .wrapping_add(self.h2.wrapping_mul(self.counter))
110            .wrapping_rem(self.n);
111        self.counter += 1;
112        Some(r)
113    }
114}
115
116pub struct DefaultBuildHasher;
117
118impl BuildHasher for DefaultBuildHasher {
119    type Hasher = DefaultHasher;
120
121    fn build_hasher(&self) -> DefaultHasher {
122        DefaultHasher::new()
123    }
124}