stable_bloom_filter/
stable.rs1use crate::buckets::Buckets;
2use crate::fnv::FnvHasher;
3use crate::Filter;
4use crate::{optimal_k, optimal_stable_p};
5use rand::{thread_rng, Rng};
6use std::hash::Hasher;
7
8pub struct StableBloomFilter {
9 cells: Buckets,
11 hash: FnvHasher,
13 m: usize,
15 p: usize,
17 k: usize,
19 max: u8,
21 index_buffer: Vec<usize>,
23}
24
25impl StableBloomFilter {
26 pub fn new(m: usize, d: u8, fp_rate: f64) -> Self {
30 let mut k = optimal_k(fp_rate) / 2;
31 if k > m {
32 k = m;
33 } else if k == 0 {
34 k = 1;
35 }
36
37 let cells = Buckets::new(m, d);
38
39 StableBloomFilter {
40 hash: FnvHasher::default(),
41 m,
42 k,
43 p: optimal_stable_p(m, k, d, fp_rate),
44 max: cells.max_bucket_value(),
45 cells,
46 index_buffer: vec![0; k],
47 }
48 }
49
50 pub fn new_default(m: usize, fp_rate: f64) -> Self {
55 Self::new(m, 1, fp_rate)
56 }
57
58 pub fn new_unstable(m: usize, fp_rate: f64) -> Self {
63 let cells = Buckets::new(m, 1);
64 let k = optimal_k(fp_rate);
65
66 StableBloomFilter {
67 hash: FnvHasher::default(),
68 m,
69 k,
70 p: 0,
71 max: cells.max_bucket_value(),
72 cells,
73 index_buffer: vec![0; k],
74 }
75 }
76
77 pub fn cells(&self) -> usize {
79 self.m
80 }
81
82 pub fn k(&self) -> usize {
84 self.k
85 }
86
87 pub fn p(&self) -> usize {
89 self.p
90 }
91
92 pub fn max(&self) -> u8 {
93 self.max
94 }
95
96 pub fn stable_point(&self) -> f64 {
100 let sub_denom = (self.p as f64) * ((1.0 / (self.k as f64)) - (1.0 / (self.m as f64)));
101 let denom = 1.0 + 1.0 / sub_denom;
102 let base = 1.0 / denom;
103
104 base.powf(f64::from(self.max))
105 }
106
107 pub fn false_positive_rate(&self) -> f64 {
110 (1.0 - self.stable_point()).powf(self.k as f64)
111 }
112
113 pub fn hash_kernel(&self, data: &[u8]) -> (u32, u32) {
114 let mut hasher = self.hash.clone();
115 hasher.write(data);
116 let hash: u64 = hasher.finish();
117 let lower = hash as u32;
118 let upper = (hash >> 32) as u32;
119 (lower, upper)
120 }
121
122 pub fn reset(&mut self) -> &Self {
125 self.cells.reset();
126 self
127 }
128
129 pub fn decrement(&mut self) {
134 let mut rng = thread_rng();
135 let r: usize = rng.gen_range(0, self.m);
136
137 for i in 0..(self.p) {
138 let idx = (r + i) % self.m;
139 self.cells.decrease(idx, 1);
140 }
141 }
142}
143
144impl Filter for StableBloomFilter {
145 fn test(&self, data: &[u8]) -> bool {
149 let (lower, upper) = self.hash_kernel(data);
150 for i in 0..(self.k) {
151 if self
152 .cells
153 .get((lower as usize + upper as usize * i) % self.m)
154 == 0
155 {
156 return false;
157 }
158 }
159 true
160 }
161
162 fn add(&mut self, data: &[u8]) -> &Self {
165 self.decrement();
167 let (lower, upper) = self.hash_kernel(data);
168
169 for i in 0..(self.k) {
170 self.cells
171 .set((lower as usize + upper as usize * i) % self.m, self.max);
172 }
173
174 self
175 }
176
177 fn test_and_add(&mut self, data: &[u8]) -> bool {
180 let (lower, upper) = self.hash_kernel(data);
181 let mut member = true;
182
183 for i in 0..(self.k) {
185 self.index_buffer[i] = (lower as usize + upper as usize * i) % self.m;
186 if self.cells.get(self.index_buffer[i]) == 0 {
187 member = false;
188 }
189 }
190
191 self.decrement();
193 for i in self.index_buffer.iter() {
195 self.cells.set(*i, self.max);
196 }
197
198 member
199 }
200}
201
202#[cfg(test)]
203mod tests {
204 use super::StableBloomFilter;
205 use crate::optimal_k;
206 use crate::Filter;
207 use float_cmp::ApproxEq;
208 use std::f64;
209
210 fn round(val: f64, round_on: f64, places: usize) -> f64 {
211 let pow = (10.0_f64).powf(places as f64);
212 let digit = pow * val;
213 let div = digit - digit.floor();
214 let round = if div >= round_on {
215 digit.ceil()
216 } else {
217 digit.floor()
218 };
219
220 round / pow
221 }
222
223 #[test]
226 fn test_new_unstable() {
227 let f = StableBloomFilter::new_unstable(100, 0.1);
228 let k = optimal_k(0.1);
229
230 assert_eq!(f.k, k);
231 assert_eq!(f.m, 100);
232 assert_eq!(f.p(), 0);
233 assert_eq!(f.max(), 1);
234 }
235
236 #[test]
239 fn test_cells() {
240 let f = StableBloomFilter::new(100, 1, 0.1);
241
242 assert_eq!(f.cells(), 100);
243 }
244
245 #[test]
248 fn test_k() {
249 let f = StableBloomFilter::new(100, 1, 0.01);
250 assert_eq!(f.k(), 3);
251 }
252
253 #[test]
255 fn test_test_and_add() {
256 let mut f = StableBloomFilter::new_default(1_000, 0.01);
257 assert!(!f.test(b"a"));
258
259 f.add(b"a");
260 assert!(f.test(b"a"));
261
262 assert!(f.test_and_add(b"a"));
263
264 assert!(!f.test_and_add(b"b"));
265 assert!(f.test(b"a"));
266
267 assert!(f.test(b"b"));
268
269 assert!(!f.test(b"c"));
270
271 for i in 0..1_000_000 {
272 f.test_and_add(i.to_string().as_bytes());
273 }
274
275 assert!(!f.test(b"a"));
277 }
278
279 #[test]
282 fn test_stable_point() {
283 let mut f = StableBloomFilter::new(1000, 1, 0.1);
284 for i in 0..1_000_000 {
285 f.add(i.to_string().as_bytes());
286 }
287
288 let mut zero = 0;
289 for i in 0..(f.m) {
290 if f.cells.get(i) == 0 {
291 zero += 1;
292 }
293 }
294
295 let actual = round(f64::from(zero) / (f.m as f64), 0.5, 1);
296 let expected = round(f.stable_point(), 0.5, 1);
297
298 assert!(actual.approx_eq(expected, (f64::EPSILON, 1)));
299 let bf = StableBloomFilter::new_unstable(1000, 0.1);
302 assert!(bf.stable_point().approx_eq(0.0, (f64::EPSILON, 1)));
303 }
304
305 #[test]
308 fn test_false_positive_rate() {
309 let f = StableBloomFilter::new_default(1000, 0.01);
310 let fps = round(f.false_positive_rate(), 0.5, 2);
311
312 assert!(fps.approx_eq(0.01, (f64::EPSILON, 1)));
313
314 let bf = StableBloomFilter::new_unstable(1000, 0.1);
317 assert!(bf.false_positive_rate().approx_eq(1.0, (f64::EPSILON, 1)));
318 }
319
320 #[test]
322 fn test_reset() {
323 let mut f = StableBloomFilter::new_default(1000, 0.01);
324
325 for i in 0..1000 {
326 f.add(i.to_string().as_bytes());
327 }
328
329 f.reset();
330
331 for i in 0..(f.m) {
332 assert_eq!(f.cells.get(i), 0);
333 }
334 }
335}