1
2extern crate bit_vec;
3use bit_vec::BitVec;
4
5use std::hash::Hash;
6use std::hash::Hasher;
7use std::hash::BuildHasher;
8
9use std::collections::hash_map::RandomState;
10
11fn false_positive_rate(n_buckets: usize, n_hashers: usize, n_elems: usize)
18 -> f32
19{
20 let k = n_hashers as f32;
21 let n = n_elems as f32;
22 let m = n_buckets as f32;
23
24 (1. - ((-k * n) / m).exp()).powf(k)
25}
26
27fn min_n_buckets(n_elems: usize, fp_rate: f32) -> usize
28{
29 let n = n_elems as f32;
30
31 (-1. * n * fp_rate.ln() / (2f32.ln().powf(2.))).ceil() as usize
32}
33
34fn optimal_n_hashers(n_buckets: usize, n_elems: usize) -> usize
40{
41 let n = n_elems as f32;
42 let m = n_buckets as f32;
43
44 ((m / n) * 2f32.ln()).ceil() as usize
45}
46
47#[derive(Debug)]
49pub struct BloomFilter
50{
51 buffer: BitVec,
52 size: usize,
53 hashers: Vec<RandomState>
54}
55
56impl BloomFilter
57{
58 pub fn new_with_fp(n_elems: usize, fp_rate: f32) -> BloomFilter
64 {
65 let min_buckets = min_n_buckets(n_elems, fp_rate);
66 let n_hashers = optimal_n_hashers(min_buckets, n_elems);
67
68 BloomFilter {
69 size: 0,
70 buffer: BitVec::from_elem(min_buckets, false),
71 hashers: (0..n_hashers).map(|_| RandomState::new()).collect()
72 }
73 }
74
75 pub fn new_with_size(n_elems: usize, size: usize) -> BloomFilter
81 {
82 let n_hashers = optimal_n_hashers(size, n_elems);
83
84 BloomFilter {
85 size: 0,
86 buffer: BitVec::from_elem(size, false),
87 hashers: (0..n_hashers).map(|_| RandomState::new()).collect()
88 }
89 }
90
91 pub fn add<T>(&mut self, e: &T)
93 where T: Hash
94 {
95 for idx in self.indexes(e) {
96 self.buffer.set(idx, true);
97 }
98
99 self.size += 1;
100 }
101
102 pub fn may_contain<T>(&self, e: &T) -> bool
104 where T: Hash
105 {
106 let mut may_contain = true;
107
108 for idx in self.indexes(e) {
109 may_contain &= self.buffer.get(idx).unwrap();
110 }
111
112 may_contain
113 }
114
115 pub fn size(&self) -> usize
117 {
118 self.size
119 }
120
121 pub fn buckets(&self) -> usize
123 {
124 self.buffer.capacity()
125 }
126
127 pub fn n_hashers(&self) -> usize
129 {
130 self.hashers.len()
131 }
132
133 pub fn fp_rate(&self) -> f32
135 {
136 false_positive_rate(self.buckets(), self.n_hashers(), self.size())
137 }
138
139 fn indexes<T>(&self, e: &T) -> Vec<usize>
141 where T: Hash
142 {
143 let mut idxs = vec![];
144 for h in &self.hashers {
145 let mut hasher = h.build_hasher();
146 e.hash(&mut hasher);
147 idxs.push(hasher.finish() as usize % self.buffer.len());
148 }
149 idxs
150 }
151}
152
153#[cfg(test)]
154mod test
155{
156 use super::*;
157
158 #[test]
160 fn test_is_deterministic()
161 {
162 let to_add = "do add this";
163 let dont_add = 123;
164 let mut filter = BloomFilter::new_with_size(1, 100);
165 filter.add(&to_add);
166
167 assert_eq!(true, filter.may_contain(&to_add));
170 assert_eq!(true, filter.may_contain(&to_add));
171
172 assert_eq!(false, filter.may_contain(&dont_add));
173 assert_eq!(false, filter.may_contain(&dont_add));
174
175 }
176
177 #[test]
178 fn test_size_increments()
179 {
180 let to_add = "do add this";
181
182 let mut filter = BloomFilter::new_with_size(3, 100);
183 filter.add(&to_add);
184 filter.add(&to_add);
185 filter.add(&to_add);
186
187 assert_eq!(3, filter.size());
188 }
189
190 #[test]
191 fn test_fp_rate_is_zero_no_elems()
192 {
193 let filter = BloomFilter::new_with_size(100, 100);
194 assert_eq!(0.0, filter.fp_rate());
195 }
196}