bloom_filters/
classic.rs

1use crate::buckets::Buckets;
2use crate::{BloomFilter, BuildHashKernels, HashKernels, UpdatableBloomFilter};
3use std::hash::Hash;
4
5pub struct Filter<BHK: BuildHashKernels> {
6    buckets: Buckets,      // filter data
7    hash_kernels: BHK::HK, // hash kernels
8}
9
10impl<BHK: BuildHashKernels> Filter<BHK> {
11    /// Create a new bloom filter structure.
12    /// items_count is an estimation of the maximum number of items to store.
13    /// fp_rate is the wanted rate of false positives, in ]0.0, 1.0[
14    pub fn new(items_count: usize, fp_rate: f64, build_hash_kernels: BHK) -> Self {
15        let buckets = Buckets::with_fp_rate(items_count, fp_rate, 1);
16        let hash_kernels = build_hash_kernels.with_fp_rate(fp_rate, buckets.len());
17        Self { buckets, hash_kernels }
18    }
19
20    pub fn with_raw_data(raw_data: &[u8], k: usize, build_hash_kernels: BHK) -> Self {
21        let buckets = Buckets::with_raw_data(raw_data.len() * 8, 1, raw_data);
22        let hash_kernels = build_hash_kernels.with_k(k, buckets.len());
23        Self { buckets, hash_kernels }
24    }
25
26    pub fn buckets(&self) -> &Buckets {
27        &self.buckets
28    }
29}
30
31impl<BHK: BuildHashKernels> BloomFilter for Filter<BHK> {
32    fn insert<T: Hash>(&mut self, item: &T) {
33        self.hash_kernels.hash_iter(item).for_each(|i| self.buckets.set(i, 1))
34    }
35
36    fn contains<T: Hash>(&self, item: &T) -> bool {
37        self.hash_kernels.hash_iter(item).all(|i| self.buckets.get(i) == 1)
38    }
39
40    fn reset(&mut self) {
41        self.buckets.reset()
42    }
43}
44
45impl<BHK: BuildHashKernels> UpdatableBloomFilter for Filter<BHK> {
46    fn update(&mut self, raw_data: &[u8]) {
47        self.buckets.update(raw_data)
48    }
49}
50
51#[cfg(test)]
52mod tests {
53    use super::*;
54    use crate::hash::{DefaultBuildHashKernels, DefaultBuildHasher};
55    use proptest::{collection::size_range, prelude::any_with, proptest};
56    use rand::random;
57    use std::collections::hash_map::RandomState;
58
59    fn _contains(items: &[usize]) {
60        let mut filter = Filter::new(100, 0.03, DefaultBuildHashKernels::new(random(), RandomState::new()));
61        assert!(items.iter().all(|i| !filter.contains(i)));
62        items.iter().for_each(|i| filter.insert(i));
63        assert!(items.iter().all(|i| filter.contains(i)));
64    }
65
66    proptest! {
67        #[test]
68        fn contains(ref items in any_with::<Vec<usize>>(size_range(16).lift())) {
69            _contains(items)
70        }
71    }
72
73    fn _raw_data(items: &[usize]) {
74        let data = vec![0; 8];
75        let hash_seed = random();
76        let mut filter = Filter::with_raw_data(&data, 2, DefaultBuildHashKernels::new(hash_seed, DefaultBuildHasher));
77        assert!(items.iter().all(|i| !filter.contains(i)));
78        items.iter().for_each(|i| filter.insert(i));
79        let data = filter.buckets().raw_data();
80        let filter = Filter::with_raw_data(&data, 2, DefaultBuildHashKernels::new(hash_seed, DefaultBuildHasher));
81        assert!(items.iter().all(|i| filter.contains(i)));
82    }
83
84    proptest! {
85        #[test]
86        fn raw_data(ref items in any_with::<Vec<usize>>(size_range(8).lift())) {
87            _raw_data(items)
88        }
89    }
90
91    fn _update(items1: &[usize], items2: &[usize]) {
92        let data = vec![0; 8];
93        let hash_seed = random();
94
95        let mut filter1 = Filter::with_raw_data(&data, 2, DefaultBuildHashKernels::new(hash_seed, DefaultBuildHasher));
96        items1.iter().for_each(|i| filter1.insert(i));
97
98        let mut filter2 = Filter::with_raw_data(&data, 2, DefaultBuildHashKernels::new(hash_seed, DefaultBuildHasher));
99        items2.iter().for_each(|i| filter2.insert(i));
100
101        filter1.update(&filter2.buckets().raw_data());
102        assert!(items1.iter().all(|i| filter1.contains(i)));
103        assert!(items2.iter().all(|i| filter1.contains(i)));
104    }
105
106    proptest! {
107        #[test]
108        fn update(
109            ref items1 in any_with::<Vec<usize>>(size_range(8).lift()),
110            ref items2 in any_with::<Vec<usize>>(size_range(8).lift())
111        ) {
112            _update(items1, items2)
113        }
114    }
115}