generic_bloom/
simple_filter.rs

1// This file is part of generic-bloom.
2//
3// generic-bloom is free software: you can redistribute it and/or
4// modify it under the terms of the GNU Affero General Public License
5// as published by the Free Software Foundation, either version 3 of
6// the License, or (at your option) any later version.
7//
8// generic-bloom is distributed in the hope that it will be useful,
9// but WITHOUT ANY WARRANTY; without even the implied warranty of
10// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11// Affero General Public License for more details.  You should have
12// received a copy of the GNU Affero General Public License along with
13// generic-bloom. If not, see <https://www.gnu.org/licenses/>.
14
15use std::collections::hash_map::RandomState;
16use std::hash::{BuildHasher, Hash, Hasher};
17use std::iter::{FromIterator, Extend};
18use crate::traits::set::*;
19use crate::traits::filter::*;
20use std::rc::Rc;
21use std::marker::PhantomData;
22
23#[derive(Debug, Clone, PartialEq)]
24/// A Bloom filter with underlying set `B` and [`BuildHasher`] type
25/// `S`, the `BuildHasher`s being held in a collection of type
26/// `V`. The supported operations are based on the traits implemented
27/// by `B`.
28pub struct SimpleBloomFilter<B, S = RandomState, V = Rc<[S]>>
29where
30    V: AsRef<[S]>,
31{
32    hashers: V,
33    set: B,
34    _phantom: PhantomData<S>
35}
36
37impl<B, S, V> SimpleBloomFilter<B, S, V>
38where
39    B: BloomSet,
40    S: BuildHasher,
41    V: AsRef<[S]>,
42{
43    /// Creates a new `SimpleBloomFilter` with a specified number of counters
44    /// and [`BuildHasher`]s. The `BuildHasher`s will be initialized by
45    /// [`default`](Default::default).
46    pub fn new(n_hashers: usize, n_counters: usize) -> Self
47    where
48        S: Default,
49        V: FromIterator<S>,
50    {
51        SimpleBloomFilter::with_hashers(
52            std::iter::repeat_with(|| S::default())
53                .take(n_hashers)
54                .collect(),
55            n_counters,
56        )
57    }
58
59    /// Creates a new `SimpleBloomFilter` with specified `BuildHasher`s and a
60    /// specified number of counters.
61    pub fn with_hashers(hashers: V, n_counters: usize) -> Self {
62        debug_assert!(hashers.as_ref().len() > 0);
63        SimpleBloomFilter {
64            hashers: hashers,
65            set: B::new(n_counters),
66            _phantom: PhantomData
67        }
68    }
69
70    /// Returns the hashers and bit set of the filter.
71    pub fn into_inner(self) -> (V, B) {
72        (self.hashers, self.set)
73    }
74
75    pub fn hashers(&self) -> &V {
76        &self.hashers
77    }
78
79    fn hash_indices<'a, T: Hash>(
80        hashers: &'a V,
81        set_size: usize,
82        val: &'a T,
83    ) -> impl Iterator<Item = usize> + 'a
84    where S: 'a {
85        hashers.as_ref().iter().map(move |b| {
86            let mut h = b.build_hasher();
87            val.hash(&mut h);
88            h.finish() as usize % set_size
89        })
90    }
91}
92
93impl<B, S, V> BloomFilter for SimpleBloomFilter<B, S, V>
94where
95    B: BloomSet,
96    S: BuildHasher,
97    V: AsRef<[S]>,
98{
99    type Set = B;
100    type Hasher = S;
101
102    fn counters(&self) -> &B {
103        return &self.set;
104    }
105
106    fn insert<T: Hash>(&mut self, val: &T) {
107        for i in Self::hash_indices(&self.hashers, self.set.size(), val) {
108            self.set.increment(i);
109        }
110    }
111
112    fn contains<T: Hash>(&self, val: &T) -> bool {
113        for i in Self::hash_indices(&self.hashers, self.set.size(), val) {
114            if !self.set.query(i) {
115                return false;
116            }
117        }
118
119        true
120    }
121
122    fn clear(&mut self) {
123        self.set.clear()
124    }
125}
126
127impl<B, S, V> BloomFilterDelete for SimpleBloomFilter<B, S, V>
128where
129    B: BloomSetDelete,
130    S: BuildHasher,
131    V: AsRef<[S]>,
132{
133    fn remove<T: Hash>(&mut self, val: &T) {
134        for i in Self::hash_indices(&self.hashers, self.set.size(), val) {
135            self.set.decrement(i);
136        }
137    }
138}
139
140impl<B, S, V> BinaryBloomFilter for SimpleBloomFilter<B, S, V>
141where
142    B: BinaryBloomSet,
143    S: BuildHasher,
144    V: AsRef<[S]>,
145{
146    fn union<Other>(&mut self, other: &Other)
147    where
148        Other: BinaryBloomFilter<Set = Self::Set, Hasher = Self::Hasher>
149    {
150        self.set.union(&other.counters());
151    }
152
153    fn intersect<Other>(&mut self, other: &Other)
154    where
155        Other: BinaryBloomFilter<Set = Self::Set, Hasher = Self::Hasher>
156    {
157        self.set.intersect(&other.counters());
158    }
159}
160
161impl<B, S, V> SpectralBloomFilter for SimpleBloomFilter<B, S, V>
162where
163    B: SpectralBloomSet,
164    B::Count: Ord,
165    S: BuildHasher,
166    V: AsRef<[S]>,
167{
168    fn contains_more_than<T: Hash>(
169        &self,
170        val: &T,
171        count: &<B as SpectralBloomSet>::Count,
172    ) -> bool {
173        for i in Self::hash_indices(&self.hashers, self.set.size(), val) {
174            if *self.set.query_count(i) <= *count {
175                return false;
176            }
177        }
178
179        true
180    }
181
182    fn find_count<T: Hash>(&self, val: &T) -> &<B as SpectralBloomSet>::Count {
183        Self::hash_indices(&self.hashers, self.set.size(), val)
184            .map(|i| self.set.query_count(i))
185            .min()
186            .unwrap()
187    }
188}
189
190impl<A: Hash, B, S, V> Extend<A> for SimpleBloomFilter<B, S, V>
191where
192    B: BloomSet,
193    S: BuildHasher,
194    V: AsRef<[S]>,
195{
196    fn extend<T>(&mut self, iter: T)
197    where
198        T: IntoIterator<Item = A>
199    {
200        for val in iter {
201            self.insert(&val);
202        }
203    }
204}