1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
use std::collections::hash_map::DefaultHasher;
use std::hash::{BuildHasher, Hash, Hasher};

// Calculates the optimal number of hash functions to use for a Bloom
// filter based on the desired rate of false positives.
pub fn compute_k_num(fp_rate: f64) -> usize {
    debug_assert!(fp_rate > 0.0 && fp_rate < 1.0);
    fp_rate.log2().abs().ceil() as usize
}

/// A trait for creating hash iterator of item.
pub trait HashKernels {
    type HI: Iterator<Item = usize>;

    fn hash_iter<T: Hash>(&self, item: &T) -> Self::HI;
}

/// A trait for creating instances of [`HashKernels`].
pub trait BuildHashKernels
where
    Self: Sized,
{
    type HK: HashKernels;

    fn with_fp_rate(self, fp_rate: f64, n: usize) -> Self::HK {
        self.with_k(compute_k_num(fp_rate), n)
    }

    fn with_k(self, k: usize, n: usize) -> Self::HK;
}

/// Used to create a DefaultHashKernels instance.
pub struct DefaultBuildHashKernels<BH> {
    hash_seed: usize,
    build_hasher: BH,
}

impl<BH: BuildHasher> DefaultBuildHashKernels<BH> {
    pub fn new(hash_seed: usize, build_hasher: BH) -> Self {
        Self { hash_seed, build_hasher }
    }
}

impl<BH: BuildHasher> BuildHashKernels for DefaultBuildHashKernels<BH> {
    type HK = DefaultHashKernels<BH>;

    fn with_k(self, k: usize, n: usize) -> Self::HK {
        Self::HK {
            k,
            n,
            hash_seed: self.hash_seed,
            build_hasher: self.build_hasher,
        }
    }
}

/// A default implementation of [Kirsch-Mitzenmacher-Optimization](https://www.eecs.harvard.edu/~michaelm/postscripts/tr-02-05.pdf) hash function
pub struct DefaultHashKernels<BH> {
    k: usize,         // numbers of hash iterating
    n: usize,         // filter size
    hash_seed: usize, // seed offset for anonymity and privacy purpose
    build_hasher: BH,
}

impl<BH: BuildHasher> HashKernels for DefaultHashKernels<BH> {
    type HI = DefaultHashIter;

    fn hash_iter<T: Hash>(&self, item: &T) -> Self::HI {
        let hasher = &mut self.build_hasher.build_hasher();
        item.hash(hasher);
        let result = hasher.finish();

        DefaultHashIter::new(result, self.k, self.n, self.hash_seed)
    }
}

pub struct DefaultHashIter {
    h1: usize,
    h2: usize,
    k: usize,
    n: usize,
    hash_seed: usize,
    counter: usize,
}

impl DefaultHashIter {
    fn new(hash: u64, k: usize, n: usize, hash_seed: usize) -> Self {
        Self {
            h1: (hash as u32) as usize,
            h2: (hash >> 32) as usize,
            k,
            n,
            hash_seed,
            counter: 0,
        }
    }
}

impl Iterator for DefaultHashIter {
    type Item = usize;

    fn next(&mut self) -> Option<usize> {
        if self.k == self.counter {
            return None;
        }
        let r = self
            .hash_seed
            .wrapping_add(self.h1)
            .wrapping_add(self.h2.wrapping_mul(self.counter))
            .wrapping_rem(self.n);
        self.counter += 1;
        Some(r)
    }
}

pub struct DefaultBuildHasher;

impl BuildHasher for DefaultBuildHasher {
    type Hasher = DefaultHasher;

    fn build_hasher(&self) -> DefaultHasher {
        DefaultHasher::new()
    }
}