bloom_filters/
stable.rs

1use crate::buckets::Buckets;
2use crate::hash::compute_k_num;
3use crate::{BloomFilter, BuildHashKernels, HashKernels};
4use rand::random;
5use std::hash::Hash;
6
7pub struct Filter<BHK: BuildHashKernels> {
8    buckets: Buckets,      // filter data
9    hash_kernels: BHK::HK, // hash kernels
10    p: usize,              // number of buckets to decrement,
11}
12
13impl<BHK: BuildHashKernels> Filter<BHK> {
14    /// Creates a new Stable Bloom Filter with m buckets and d
15    /// bits allocated per bucket optimized for the target false-positive rate.
16    pub fn new(m: usize, d: u8, fp_rate: f64, build_hash_kernels: BHK) -> Self {
17        let mut k = compute_k_num(fp_rate);
18        if k > m {
19            k = m
20        } else if k == 0 {
21            k = 1
22        }
23
24        let buckets = Buckets::new(m, d);
25        let hash_kernels = build_hash_kernels.with_k(k, buckets.len());
26
27        Self {
28            buckets,
29            hash_kernels,
30            p: compute_p_num(m, k, d, fp_rate),
31        }
32    }
33
34    fn decrement(&mut self) {
35        let r: usize = random();
36        (0..self.p).for_each(|i| {
37            let bucket = (r + i) % self.buckets.len();
38            self.buckets.increment(bucket, -1)
39        })
40    }
41}
42
43// returns the optimal number of buckets to decrement, p, per
44// iteration for the provided parameters of an SBF.
45fn compute_p_num(m: usize, k: usize, d: u8, fp_rate: f64) -> usize {
46    let (m, k, d) = (m as f64, k as f64, f64::from(d));
47    let max = 2f64.powf(d) - 1.0;
48    let sub_denom = (1.0 - fp_rate.powf(1.0 / k)).powf(1.0 / max);
49    let denom = (1.0 / sub_denom - 1.0) * (1.0 / k - 1.0 / m);
50    let p = (1.0 / denom).ceil();
51    if p <= 0.0 {
52        1
53    } else {
54        p as usize
55    }
56}
57
58impl<BHK: BuildHashKernels> BloomFilter for Filter<BHK> {
59    fn insert<T: Hash>(&mut self, item: &T) {
60        self.decrement();
61        let max = self.buckets.max_value();
62        self.hash_kernels.hash_iter(item).for_each(|i| self.buckets.set(i, max))
63    }
64
65    fn contains<T: Hash>(&self, item: &T) -> bool {
66        self.hash_kernels.hash_iter(item).all(|i| self.buckets.get(i) > 0)
67    }
68
69    fn reset(&mut self) {
70        self.buckets.reset()
71    }
72}
73
74#[cfg(test)]
75mod tests {
76    use super::*;
77    use crate::hash::DefaultBuildHashKernels;
78    use proptest::{collection::size_range, prelude::any_with, proptest};
79    use rand::random;
80    use std::collections::hash_map::RandomState;
81
82    fn _contains(items: &[usize]) {
83        // d = 3, max = (1 << d) - 1
84        let mut filter = Filter::new(100, 3, 0.03, DefaultBuildHashKernels::new(random(), RandomState::new()));
85        assert!(items.iter().all(|i| !filter.contains(i)));
86        items.iter().for_each(|i| filter.insert(i));
87        assert!(items.iter().all(|i| filter.contains(i)));
88    }
89
90    proptest! {
91        #[test]
92        fn contains(ref items in any_with::<Vec<usize>>(size_range(7).lift())) {
93            _contains(items)
94        }
95    }
96}