1extern crate fnv;
8
9use fnv::FnvHasher;
10use std::hash::Hasher;
11
12const KEY_SIZE: usize = 12;
13const ARRAY_SIZE: usize = 1 << KEY_SIZE;
14const KEY_MASK: u32 = (1 << KEY_SIZE) - 1;
15const KEY_SHIFT: usize = 16;
16
17pub struct BloomFilter {
64 counters: [u8; ARRAY_SIZE],
65}
66
67impl Clone for BloomFilter {
68 #[inline]
69 fn clone(&self) -> BloomFilter {
70 BloomFilter {
71 counters: self.counters,
72 }
73 }
74}
75
76impl BloomFilter {
77 #[inline]
79 pub fn new() -> BloomFilter {
80 BloomFilter {
81 counters: [0; ARRAY_SIZE],
82 }
83 }
84
85 #[inline]
86 fn first_slot(&self, hash: u32) -> &u8 {
87 &self.counters[hash1(hash) as usize]
88 }
89
90 #[inline]
91 fn first_mut_slot(&mut self, hash: u32) -> &mut u8 {
92 &mut self.counters[hash1(hash) as usize]
93 }
94
95 #[inline]
96 fn second_slot(&self, hash: u32) -> &u8 {
97 &self.counters[hash2(hash) as usize]
98 }
99
100 #[inline]
101 fn second_mut_slot(&mut self, hash: u32) -> &mut u8 {
102 &mut self.counters[hash2(hash) as usize]
103 }
104
105 #[inline]
106 pub fn clear(&mut self) {
107 self.counters = [0; ARRAY_SIZE]
108 }
109
110 #[inline]
111 fn insert_hash(&mut self, hash: u32) {
112 {
113 let slot1 = self.first_mut_slot(hash);
114 if !full(slot1) {
115 *slot1 += 1
116 }
117 }
118 {
119 let slot2 = self.second_mut_slot(hash);
120 if !full(slot2) {
121 *slot2 += 1
122 }
123 }
124 }
125
126 #[inline]
128 pub fn insert<T:BloomHash>(&mut self, elem: &T) {
129 self.insert_hash(elem.bloom_hash())
130
131 }
132
133 #[inline]
134 fn remove_hash(&mut self, hash: u32) {
135 {
136 let slot1 = self.first_mut_slot(hash);
137 if !full(slot1) {
138 *slot1 -= 1
139 }
140 }
141 {
142 let slot2 = self.second_mut_slot(hash);
143 if !full(slot2) {
144 *slot2 -= 1
145 }
146 }
147 }
148
149 #[inline]
151 pub fn remove<T:BloomHash>(&mut self, elem: &T) {
152 self.remove_hash(elem.bloom_hash())
153 }
154
155 #[inline]
156 fn might_contain_hash(&self, hash: u32) -> bool {
157 *self.first_slot(hash) != 0 && *self.second_slot(hash) != 0
158 }
159
160 #[inline]
165 pub fn might_contain<T:BloomHash>(&self, elem: &T) -> bool {
166 self.might_contain_hash(elem.bloom_hash())
167 }
168}
169
170pub trait BloomHash {
171 fn bloom_hash(&self) -> u32;
172}
173
174impl BloomHash for u64 {
175 #[inline]
176 fn bloom_hash(&self) -> u32 {
177 ((*self >> 32) ^ *self) as u32
178 }
179}
180
181impl BloomHash for isize {
182 #[allow(exceeding_bitshifts)]
183 #[inline]
184 fn bloom_hash(&self) -> u32 {
185 ((*self >> 32) ^ *self) as u32
186 }
187}
188
189impl BloomHash for usize {
190 #[allow(exceeding_bitshifts)]
191 #[inline]
192 fn bloom_hash(&self) -> u32 {
193 ((*self >> 32) ^ *self) as u32
194 }
195}
196
197impl BloomHash for String {
198 #[inline]
199 fn bloom_hash(&self) -> u32 {
200 let mut h = FnvHasher::default();
201 h.write(self.as_bytes());
202 h.finish().bloom_hash()
203 }
204}
205
206#[inline]
207fn full(slot: &u8) -> bool {
208 *slot == 0xff
209}
210
211#[inline]
212fn hash1(hash: u32) -> u32 {
213 hash & KEY_MASK
214}
215
216#[inline]
217fn hash2(hash: u32) -> u32 {
218 (hash >> KEY_SHIFT) & KEY_MASK
219}
220
221#[test]
222fn create_and_insert_some_stuff() {
223 let mut bf = BloomFilter::new();
224
225 for i in 0_usize .. 1000 {
226 bf.insert(&i);
227 }
228
229 for i in 0_usize .. 1000 {
230 assert!(bf.might_contain(&i));
231 }
232
233 let false_positives =
234 (1001_usize .. 2000).filter(|i| bf.might_contain(i)).count();
235
236 assert!(false_positives < 10); for i in 0_usize .. 100 {
239 bf.remove(&i);
240 }
241
242 for i in 100_usize .. 1000 {
243 assert!(bf.might_contain(&i));
244 }
245
246 let false_positives = (0_usize .. 100).filter(|i| bf.might_contain(i)).count();
247
248 assert!(false_positives < 2); bf.clear();
251
252 for i in 0_usize .. 2000 {
253 assert!(!bf.might_contain(&i));
254 }
255}
256
257#[cfg(test)]
258mod bench {
259 extern crate test;
260
261 use std::hash::{Hash, Hasher, SipHasher};
262 use super::BloomFilter;
263
264 #[bench]
265 fn create_insert_1000_remove_100_lookup_100(b: &mut test::Bencher) {
266 b.iter(|| {
267 let mut bf = BloomFilter::new();
268 for i in 0_usize .. 1000 {
269 bf.insert(&i);
270 }
271 for i in 0_usize .. 100 {
272 bf.remove(&i);
273 }
274 for i in 100_usize .. 200 {
275 test::black_box(bf.might_contain(&i));
276 }
277 });
278 }
279
280 #[bench]
281 fn might_contain(b: &mut test::Bencher) {
282 let mut bf = BloomFilter::new();
283
284 for i in 0_usize .. 1000 {
285 bf.insert(&i);
286 }
287
288 let mut i = 0_usize;
289
290 b.bench_n(1000, |b| {
291 b.iter(|| {
292 test::black_box(bf.might_contain(&i));
293 i += 1;
294 });
295 });
296 }
297
298 #[bench]
299 fn insert(b: &mut test::Bencher) {
300 let mut bf = BloomFilter::new();
301
302 b.bench_n(1000, |b| {
303 let mut i = 0_usize;
304
305 b.iter(|| {
306 test::black_box(bf.insert(&i));
307 i += 1;
308 });
309 });
310 }
311
312 #[bench]
313 fn remove(b: &mut test::Bencher) {
314 let mut bf = BloomFilter::new();
315 for i in 0_usize .. 1000 {
316 bf.insert(&i);
317 }
318
319 b.bench_n(1000, |b| {
320 let mut i = 0_usize;
321
322 b.iter(|| {
323 bf.remove(&i);
324 i += 1;
325 });
326 });
327
328 test::black_box(bf.might_contain(&0_usize));
329 }
330
331 #[bench]
332 fn hash_a_uint(b: &mut test::Bencher) {
333 let mut i = 0_usize;
334 b.iter(|| {
335 let mut hasher = SipHasher::default();
336 i.hash(&mut hasher);
337 test::black_box(hasher.finish());
338 i += 1;
339 })
340 }
341}