1use std::cmp::min;
2use std::fs::{File, OpenOptions};
3use std::fs;
4use std::io::{Write, Read};
5use std::ptr::slice_from_raw_parts;
6
7use xxhash_rust::xxh3::xxh3_64_with_seed;
8
9use crate::{Deletable, Hashes, Membership};
10use crate::builder::FilterBuilder;
11use crate::vec::{BloomBitVec, CountingVec};
12
13#[inline]
14fn bit_set(bit_set: &mut BloomBitVec, value: &[u8], m: u64, k: u64) {
15 let hash1 = xxh3_64_with_seed(value, 0) % m;
19 let hash2 = xxh3_64_with_seed(value, 32) % m;
20
21 let m = m as u64;
22 for i in 1..k {
23 let mo = ((hash1 + i * hash2) % m) as usize;
24 bit_set.set(mo);
25 };
26 bit_set.set(hash1 as usize);
27}
28
29#[inline]
30fn bit_check(bit_set: &BloomBitVec, value: &[u8], m: u64, k: u64) -> bool {
31 let hash1 = xxh3_64_with_seed(value, 0) % m;
34 let hash2 = xxh3_64_with_seed(value, 32) % m;
35 let mut res = bit_set.get(hash1 as usize);
36 if !res { return false; }
37 for i in 1..k {
39 let mo = ((hash1 + i * hash2) % m) as usize;
40 res = res && bit_set.get(mo);
41 if !res { return false; }
42 }
43 res
44}
45
46#[inline]
47fn bit_check_and_set(bit_set: &mut BloomBitVec, value: &[u8], m: u64, k: u64) -> bool {
48 let hash1 = xxh3_64_with_seed(value, 0) % m;
51 let hash2 = xxh3_64_with_seed(value, 32) % m;
52 let mut res = bit_set.get(hash1 as usize);
53 bit_set.set(hash1 as usize);
54 for i in 1..k {
56 let mo = ((hash1 + i * hash2) % m) as usize;
57 res = res && bit_set.get(mo);
58 bit_set.set(mo);
59 }
60 res
61}
62
63#[inline]
64fn get_bit_indices(bit_set: &BloomBitVec, value: &[u8], m: u64, k: u64) -> Vec<u64> {
65 let mut res = Vec::<u64>::with_capacity(k as usize);
66 let hash1 = xxh3_64_with_seed(value, 0) % m;
69 let hash2 = xxh3_64_with_seed(value, 32) % m;
70 res.push(hash1);
71 for i in 1..k {
73 let mo = ((hash1 + i * hash2) % m) as usize;
74 res.push(mo as u64);
75 }
76 res
77}
78
79#[derive(Clone)]
87#[derive(Debug)]
88#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
89pub struct BloomFilter {
90 config: FilterBuilder,
91 bit_set: BloomBitVec,
92}
93
94impl Membership for BloomFilter {
95 fn add(&mut self, element: &[u8]) {
97 bit_set(&mut self.bit_set, element, self.config.size,
98 self.config.hashes as u64);
99 }
100
101 #[inline]
104 fn contains(&self, element: &[u8]) -> bool {
105 bit_check(&self.bit_set, element, self.config.size,
106 self.config.hashes as u64)
107 }
108
109 fn get_hash_indices(&self, element: &[u8]) -> Vec<u64> {
111 get_bit_indices(&self.bit_set, element, self.config.size,
112 self.config.hashes as u64)
113 }
114
115 fn contains_hash_indices(&self, indices: &Vec<u64>) -> bool {
117 for x in indices.iter() {
118 let index = *x;
119 if !self.bit_set.get(index as usize) { return false; }
120 }
121 true
122 }
123
124 fn clear(&mut self) {
126 self.bit_set.clear();
127 }
128}
129
130impl Hashes for BloomFilter {
131 fn hashes(&self) -> u32 {
133 self.config.hashes
134 }
135}
136
137impl BloomFilter {
138 pub fn new(mut config: FilterBuilder) -> Self {
149 config.complete();
150 #[cfg(target_pointer_width = "64")]
151 let bit_set = BloomBitVec::new((config.size >> 6) as usize);
152 #[cfg(target_pointer_width = "32")]
153 let bit_set = BloomBitVec::new((config.size >> 5) as usize);
154 BloomFilter { config, bit_set }
155 }
156
157 #[inline]
160 pub fn add_if_not_contains(&mut self, element: &[u8]) -> bool {
161 bit_check_and_set(&mut self.bit_set, element, self.config.size,
162 self.config.hashes as u64)
163 }
164
165 pub fn from_file_with_hashes(path: &str) -> Self {
168 let mut f = File::open(path).unwrap();
169 let len = f.metadata().unwrap().len() - 4;
170 let mut hash = [0; 4];
171 f.read_exact(&mut hash).unwrap();
172 let hashes = u32::from_be_bytes(hash);
173
174 let mut config =
175 FilterBuilder::from_size_and_hashes((len * 8) as u64, hashes);
176 config.complete();
177
178 let bit_set = BloomBitVec::from_file(&mut f, 4, len);
179
180 BloomFilter { config, bit_set }
181 }
182
183 pub fn from_file(path: &str, hashes: u32) -> Self {
185 let mut f = File::open(path).unwrap();
186 let len = f.metadata().unwrap().len();
187 let mut config =
188 FilterBuilder::from_size_and_hashes((len * 8) as u64, hashes);
189 config.complete();
190
191 let bit_set = BloomBitVec::from_file(&mut f, 0, len);
192
193 BloomFilter { config, bit_set }
194 }
195
196 pub fn from_u8_array(array: &[u8], hashes: u32) -> Self {
206 let mut config =
207 FilterBuilder::from_size_and_hashes((array.len() * 8) as u64, hashes);
208 config.complete();
209 #[cfg(target_pointer_width = "64")]
210 let mut bit_vec = BloomBitVec::new((config.size >> 6) as usize);
211 #[cfg(target_pointer_width = "32")]
212 let mut bit_vec = BloomBitVec::new((config.size >> 5) as usize);
213
214 let ptr = array.as_ptr() as *const usize;
215 #[cfg(target_pointer_width = "64")]
216 let usize_array = slice_from_raw_parts(ptr, (config.size >> 6) as usize);
217 #[cfg(target_pointer_width = "32")]
218 let usize_array = slice_from_raw_parts(ptr, (config.size >> 5) as usize);
219
220 bit_vec.storage.copy_from_slice(unsafe { &*usize_array });
221
222 BloomFilter { config, bit_set: bit_vec }
223 }
224
225 pub fn from_u16_array(array: &[u16], hashes: u32) -> Self {
235 let mut config =
236 FilterBuilder::from_size_and_hashes((array.len() * 16) as u64, hashes);
237 config.complete();
238 #[cfg(target_pointer_width = "64")]
239 let mut bit_vec = BloomBitVec::new((config.size >> 6) as usize);
240 #[cfg(target_pointer_width = "32")]
241 let mut bit_vec = BloomBitVec::new((config.size >> 5) as usize);
242
243 let ptr = array.as_ptr() as *const usize;
244 #[cfg(target_pointer_width = "64")]
245 let usize_array = slice_from_raw_parts(ptr, (config.size >> 6) as usize);
246 #[cfg(target_pointer_width = "32")]
247 let usize_array = slice_from_raw_parts(ptr, (config.size >> 5) as usize);
248
249 bit_vec.storage.copy_from_slice(unsafe { &*usize_array });
250
251 BloomFilter { config, bit_set: bit_vec }
252 }
253
254
255 pub fn from_u32_array(array: &[u32], hashes: u32) -> Self {
265 let mut config =
266 FilterBuilder::from_size_and_hashes((array.len() * 32) as u64, hashes);
267 config.complete();
268 #[cfg(target_pointer_width = "64")]
269 let mut bit_vec = BloomBitVec::new((config.size >> 6) as usize);
270 #[cfg(target_pointer_width = "32")]
271 let mut bit_vec = BloomBitVec::new((config.size >> 5) as usize);
272
273 let ptr = array.as_ptr() as *const usize;
274 #[cfg(target_pointer_width = "64")]
275 let usize_array = slice_from_raw_parts(ptr, (config.size >> 6) as usize);
276 #[cfg(target_pointer_width = "32")]
277 let usize_array = slice_from_raw_parts(ptr, (config.size >> 5) as usize);
278
279 bit_vec.storage.copy_from_slice(unsafe { &*usize_array });
280
281 BloomFilter { config, bit_set: bit_vec }
282 }
283
284 pub fn from_u64_array(array: &[u64], hashes: u32) -> Self {
294 let mut config =
295 FilterBuilder::from_size_and_hashes((array.len() * 64) as u64, hashes);
296 config.complete();
297 #[cfg(target_pointer_width = "64")]
298 let mut bit_vec = BloomBitVec::new((config.size >> 6) as usize);
299 #[cfg(target_pointer_width = "32")]
300 let mut bit_vec = BloomBitVec::new((config.size >> 5) as usize);
301
302 let ptr = array.as_ptr() as *const usize;
303 #[cfg(target_pointer_width = "64")]
304 let usize_array = slice_from_raw_parts(ptr, (config.size >> 6) as usize);
305 #[cfg(target_pointer_width = "32")]
306 let usize_array = slice_from_raw_parts(ptr, (config.size >> 5) as usize);
307
308 bit_vec.storage.copy_from_slice(unsafe { &*usize_array });
309
310 BloomFilter { config, bit_set: bit_vec }
311 }
312
313 pub fn config(&self) -> FilterBuilder {
324 self.config.clone()
325 }
326
327 pub fn save_to_file_with_hashes(&mut self, path: &str) {
330 let mut file = File::create(path).unwrap();
331 let hash = self.hashes().to_be_bytes();
332 file.write_all(&hash).unwrap();
333
334 let bytes = self.get_u8_array();
335 let mut file = OpenOptions::new().append(true).open(path).unwrap();
336 file.write_all(bytes).unwrap();
337 }
338
339 pub fn save_to_file(&mut self, path: &str) {
342 let mut file = File::create(path).unwrap();
343 let bytes = self.get_u8_array();
344 file.write_all(bytes).unwrap();
345 }
346
347 pub fn get_u8_array(&self) -> &[u8] {
349 let storage = &self.bit_set.storage;
350 let ptr = storage.as_ptr();
351 let u8_ptr = ptr as *const u8;
352 #[cfg(target_pointer_width = "64")]
353 let ptr = slice_from_raw_parts(u8_ptr, storage.len() * 8);
354 #[cfg(target_pointer_width = "32")]
355 let ptr = slice_from_raw_parts(u8_ptr, storage.len() * 4);
356 unsafe { &*ptr }
357 }
358
359 pub fn get_u16_array(&self) -> &[u16] {
361 let storage = &self.bit_set.storage;
362 let ptr = storage.as_ptr() as *const u16;
363 #[cfg(target_pointer_width = "64")]
364 let ptr = slice_from_raw_parts(ptr, storage.len() * 4);
365 #[cfg(target_pointer_width = "32")]
366 let ptr = slice_from_raw_parts(ptr, storage.len() * 2);
367 unsafe { &*ptr }
368 }
369
370 pub fn get_u32_array(&self) -> &[u32] {
372 let storage = &self.bit_set.storage;
373 let ptr = storage.as_ptr() as *const u32;
374 #[cfg(target_pointer_width = "64")]
375 let ptr = slice_from_raw_parts(ptr, storage.len() * 2);
376 #[cfg(target_pointer_width = "32")]
377 let ptr = slice_from_raw_parts(ptr, storage.len());
378 unsafe { &*ptr }
379 }
380
381 pub fn get_u64_array(&self) -> &[u64] {
383 let storage = &self.bit_set.storage;
384 let ptr = storage.as_ptr() as *const u64;
385 #[cfg(target_pointer_width = "64")]
386 let ptr = slice_from_raw_parts(ptr, storage.len());
387 if cfg!(target_pointer_width= "32") {
388 if storage.len() % 2 != 0 {
389 panic!("BloomBitVec with len {} can't export as u64 array!", storage.len())
390 }
391 }
392 #[cfg(target_pointer_width = "32")]
393 let ptr = slice_from_raw_parts(ptr, storage.len() / 2usize);
394
395 unsafe { &*ptr }
396 }
397
398
399 pub fn union(&mut self, other: &BloomFilter) -> bool {
404 if self.compatible(other) {
405 self.bit_set.or(&other.bit_set);
406 true
407 } else { false }
408 }
409
410 pub fn intersect(&mut self, other: &BloomFilter) -> bool {
416 if self.compatible(other) {
417 self.bit_set.and(&other.bit_set);
418 true
419 } else { false }
420 }
421
422 pub fn is_empty(&self) -> bool {
424 self.bit_set.is_empty()
425 }
426
427 pub fn estimate_set_cardinality(&self) -> f64 {
430 (self.bit_set.count_zeros() as f64 / self.config.size as f64).ln() / (self.hashes() as f64 * (1.0 - 1.0/self.config.size as f64).ln())
431 }
432
433 pub(crate) fn set_bit_vec(&mut self, bit_vec: BloomBitVec) {
434 assert_eq!(self.config.size, bit_vec.nbits as u64);
435 self.bit_set = bit_vec
436 }
437
438 fn compatible(&self, other: &BloomFilter) -> bool {
441 self.config.is_compatible_to(&other.config)
442 }
443}
444
445#[derive(Clone)]
453#[derive(Debug)]
454pub struct CountingBloomFilter {
455 config: FilterBuilder,
456 counting_vec: CountingVec,
457}
458
459macro_rules! get_array {
460 ($name:ident, $native:ty, $len:expr) => {
461 impl CountingBloomFilter {
462 pub fn $name(&self) -> &[$native] {
463 let ptr = self.counting_vec.storage.as_ptr() as *const $native;
464 #[cfg(target_pointer_width = "64")]
465 let arr = slice_from_raw_parts(ptr, self.counting_vec.storage.len() * $len);
466 #[cfg(target_pointer_width = "32")]
467 if cfg!(target_pointer_width= "32") {
468 if self.counting_vec.storage.len() % 2 != 0 {
469 panic!("CountingVec with len {} can't export as u64 array!", self.counting_vec.storage.len())
470 }
471 }
472 #[cfg(target_pointer_width = "32")]
473 let arr = slice_from_raw_parts(ptr, self.counting_vec.storage.len() * $len / 2);
474 unsafe { &*arr }
475 }
476 }
477 };
478}
479
480get_array!(get_u8_array, u8, 8);
481get_array!(get_u16_array, u16, 4);
482get_array!(get_u32_array, u32, 2);
483get_array!(get_u64_array, u64, 1);
484
485impl CountingBloomFilter {
486 pub fn new(mut config: FilterBuilder) -> Self {
487 config.complete();
488 #[cfg(target_pointer_width = "64")]
489 let counting_vec = CountingVec::new((config.size >> 4) as usize);
490 #[cfg(target_pointer_width = "32")]
491 let counting_vec = CountingVec::new((config.size >> 3) as usize);
492 CountingBloomFilter { config, counting_vec }
493 }
494
495 pub(crate) fn set_counting_vec(&mut self, counting_vec: CountingVec) {
496 assert_eq!(self.config.size, counting_vec.counters as u64);
497 self.counting_vec = counting_vec
498 }
499
500 fn compatible(&self, other: &BloomFilter) -> bool {
503 self.config.is_compatible_to(&other.config)
504 }
505
506 pub fn config(&self) -> FilterBuilder {
517 self.config.clone()
518 }
519}
520
521macro_rules! from_array {
522 ($name:ident, $native:ty, $num:expr) => {
523 impl CountingBloomFilter {
524 pub fn $name(array: &[$native], hashes: u32, enable_repeat_insert:bool) -> Self {
525 let mut config =
526 FilterBuilder::from_size_and_hashes((array.len() * $num) as u64, hashes);
527 config.enable_repeat_insert(enable_repeat_insert);
528 config.complete();
529 #[cfg(target_pointer_width = "64")]
530 let mut counting_vec = CountingVec::new((config.size >> 4) as usize);
531 #[cfg(target_pointer_width = "32")]
532 let mut counting_vec = CountingVec::new((config.size >> 3) as usize);
533
534 let ptr = array.as_ptr() as *const usize;
535 #[cfg(target_pointer_width = "64")]
536 let usize_array = slice_from_raw_parts(ptr, (config.size >> 4) as usize);
537 #[cfg(target_pointer_width = "32")]
538 let usize_array = slice_from_raw_parts(ptr, (config.size >> 3) as usize);
539
540 counting_vec.storage.copy_from_slice(unsafe { &*usize_array });
541
542 CountingBloomFilter { config, counting_vec }
543 }
544 }
545 };
546}
547
548from_array!(from_u8_array, u8, 2);
549from_array!(from_u16_array, u16, 4);
550from_array!(from_u32_array, u32, 8);
551from_array!(from_u64_array, u64, 16);
552
553impl CountingBloomFilter {
554 pub fn estimate_count(&self, element: &[u8]) -> usize {
557 let m = self.config.size;
558 let hash1 = xxh3_64_with_seed(element, 0) % m;
559 let hash2 = xxh3_64_with_seed(element, 32) % m;
560
561 let mut res = self.counting_vec.get(hash1 as usize);
562 if res == 0 { return 0; }
563
564 for i in 1..self.config.hashes as u64 {
565 let mo = ((hash1 + i * hash2) % m) as usize;
566 let count = self.counting_vec.get(mo);
567 if count == 0 { return 0; } else { res = min(count, res) }
568 }
569
570 res
571 }
572
573 pub fn counter_at(&self, index: u64) -> usize {
575 self.counting_vec.get(index as usize)
576 }
577}
578
579impl Membership for CountingBloomFilter {
580 fn add(&mut self, element: &[u8]) {
581 let m = self.config.size;
582 let hash1 = xxh3_64_with_seed(element, 0) % m;
585 let hash2 = xxh3_64_with_seed(element, 32) % m;
586
587 let mut res = self.counting_vec.get(hash1 as usize) > 0;
588 for i in 1..self.config.hashes as u64 {
590 let mo = ((hash1 + i * hash2) % m) as usize;
591 res = res && (self.counting_vec.get(mo) > 0);
592 }
593
594 if res && !self.config.enable_repeat_insert {
596 return;
597 }
598
599 for i in 1..self.config.hashes as u64 {
601 let mo = ((hash1 + i * hash2) % m) as usize;
602 self.counting_vec.increment(mo);
603 };
604 self.counting_vec.increment(hash1 as usize);
605 }
606
607 #[inline]
608 fn contains(&self, element: &[u8]) -> bool {
609 let m = self.config.size;
610 let hash1 = xxh3_64_with_seed(element, 0) % m;
613 let hash2 = xxh3_64_with_seed(element, 32) % m;
614
615 let mut res = self.counting_vec.get(hash1 as usize) > 0;
616 if !res { return false; }
617 for i in 1..self.config.hashes as u64 {
619 let mo = ((hash1 + i * hash2) % m) as usize;
620 res = res && (self.counting_vec.get(mo) > 0);
621 if !res { return false; }
622 }
623 res
624 }
625
626 fn get_hash_indices(&self, element: &[u8]) -> Vec<u64> {
627 let m = self.config.size;
628 let mut res = Vec::<u64>::with_capacity(self.config.size as usize);
629 let hash1 = xxh3_64_with_seed(element, 0) % m;
632 let hash2 = xxh3_64_with_seed(element, 32) % m;
633 res.push(hash1);
634 for i in 1..self.config.hashes as u64 {
636 let mo = ((hash1 + i * hash2) % m) as usize;
637 res.push(mo as u64);
638 }
639 res
640 }
641
642 fn contains_hash_indices(&self, indices: &Vec<u64>) -> bool {
643 for x in indices.iter() {
644 let index = *x;
645 if self.counting_vec.get(index as usize) == 0 { return false; }
646 }
647 true
648 }
649
650 fn clear(&mut self) {
651 self.counting_vec.clear()
652 }
653}
654
655impl Deletable for CountingBloomFilter {
656 fn remove(&mut self, element: &[u8]) {
657 let m = self.config.size;
658 let hash1 = xxh3_64_with_seed(element, 0) % m;
661 let hash2 = xxh3_64_with_seed(element, 32) % m;
662
663 let mut res = self.counting_vec.get(hash1 as usize) > 0;
664 for i in 1..self.config.hashes as u64 {
666 let mo = ((hash1 + i * hash2) % m) as usize;
667 res = res && (self.counting_vec.get(mo) > 0);
668 }
669
670 if res {
672 for i in 1..self.config.hashes as u64 {
673 let mo = ((hash1 + i * hash2) % m) as usize;
674 self.counting_vec.decrement(mo);
675 };
676 self.counting_vec.decrement(hash1 as usize);
677 }
678 }
679}
680
681impl Hashes for CountingBloomFilter {
682 fn hashes(&self) -> u32 {
683 self.config.hashes
684 }
685}
686
687#[derive(Clone)]
702#[derive(Debug)]
703pub(crate) struct PartitionedBloomFilter {}
704
705impl PartitionedBloomFilter {}
706
707#[derive(Clone)]
714#[derive(Debug)]
715pub(crate) struct ScalableBloomFilter {}
716
717impl ScalableBloomFilter {}
718
719#[derive(Clone)]
732#[derive(Debug)]
733pub(crate) struct InvertibleBloomFilter {}
734
735impl InvertibleBloomFilter {}
736
737#[derive(Clone)]
738#[derive(Debug)]
739pub(crate) struct GarbledBloomFilter {}
740
741impl GarbledBloomFilter {}
742
743
744#[test]
745fn bloom_test() {
746 let mut builder =
747 FilterBuilder::new(10_000_000, 0.01);
748 let mut bloom = builder.build_bloom_filter();
749 println!("{:?}", bloom.config);
750 bloom.add(b"hello");
751 println!("{:?}", &bloom.bit_set.storage[0..300]);
752 assert_eq!(bloom.contains(b"hello"), true);
753 assert_eq!(bloom.contains(b"world"), false);
754 assert_eq!(bloom.add_if_not_contains(b"hello2"), false);
755 assert_eq!(bloom.contains(b"hello2"), true);
756
757 let storage = &bloom.bit_set.storage[0..300];
758 println!("{:?}", storage);
759
760 #[cfg(target_pointer_width = "64")]{
761 let mut bloom2 = BloomFilter::from_u64_array(bloom.get_u64_array(), bloom.hashes());
762 assert_eq!(bloom2.compatible(&bloom), true);
763 assert_eq!(bloom2.contains(b"hello"), true);
764 assert_eq!(bloom2.contains(b"world"), false);
765 }
766
767 let mut bloom3 =
768 BloomFilter::from_u32_array(bloom.get_u32_array(), bloom.config.hashes);
769 assert_eq!(bloom3.compatible(&bloom), true);
770 assert_eq!(bloom3.contains(b"hello"), true);
771 assert_eq!(bloom3.contains(b"world"), false);
772
773 let u8_array = bloom.get_u8_array();
774 let mut bloom4 = BloomFilter::from_u8_array(u8_array, bloom.config.hashes);
775 println!("{:?}", &bloom4.bit_set.storage[0..300]);
776 assert_eq!(bloom4.compatible(&bloom), true);
777 assert_eq!(bloom4.contains(b"hello"), true);
778 assert_eq!(bloom4.contains(b"world"), false);
779
780 let bloom5 = BloomFilter::from_u16_array(bloom.get_u16_array(), bloom.hashes());
781 assert_eq!(bloom5.compatible(&bloom), true);
782 assert_eq!(bloom5.contains(b"hello"), true);
783 assert_eq!(bloom5.contains(b"world"), false);
784
785 bloom4.add(b"hello world");
786
787 assert_eq!(bloom.intersect(&bloom4), true);
788 assert_eq!(bloom.contains(b"hello"), true);
789 assert_eq!(bloom.contains(b"hello world"), false);
790
791 bloom3.add(b"hello world");
792 bloom3.add(b"hello yankun");
793
794 assert_eq!(bloom3.union(&bloom4), true);
795 assert_eq!(bloom3.contains(b"hello"), true);
796 assert_eq!(bloom3.contains(b"hello world"), true);
797 assert_eq!(bloom3.contains(b"hello yankun"), true);
798}
799
800#[test]
801fn bloom_hash_indices_test() {
802 let mut builder =
803 FilterBuilder::new(10_000, 0.01);
804 let mut bloom = builder.build_bloom_filter();
805 println!("{:?}", bloom.config);
806 bloom.add(b"hello");
807 assert_eq!(bloom.contains(b"hello"), true);
808 assert_eq!(bloom.contains(b"world"), false);
809
810 let indices = bloom.get_hash_indices(b"hello");
811 println!("{:?}", indices);
812 assert_eq!(bloom.contains_hash_indices(&indices), true);
813 assert_eq!(bloom.contains_hash_indices(&bloom.get_hash_indices(b"world")), false);
814}
815
816#[test]
817fn bloom_large() {
818 let mut builder =
819 FilterBuilder::new(1_000_000_000, 0.0001);
820 let mut bloom = builder.build_bloom_filter();
821
822 bloom.add(b"hello");
823 assert_eq!(bloom.contains(b"hello"), true);
824
825 let bloom = BloomFilter::from_u8_array(bloom.get_u8_array(), bloom.hashes());
826
827 assert_eq!(bloom.contains(b"hello"), true);
828
829}
830
831#[test]
832fn bloom_save_and_load_file_hashes() {
833 {
834 let mut builder = FilterBuilder::new(1_000_000_000, 0.0001);
835 let mut bloom = builder.build_bloom_filter();
836
837 bloom.add(b"hello");
838 assert_eq!(bloom.contains(b"hello"), true);
839 bloom.save_to_file_with_hashes("hello.bloom");
840 }
841
842
843 let bloom2 = BloomFilter::from_file_with_hashes("hello.bloom");
844 fs::remove_file("hello.bloom").unwrap();
845
846 assert_eq!(bloom2.contains(b"hello"), true);
847 assert_eq!(bloom2.contains(b"world"), false);
848
849}
850
851#[test]
852fn bloom_save_and_load_file() {
853 let mut hashes = 0;
854 {
855 let mut builder = FilterBuilder::new(1_000_000_000, 0.0001);
856 let mut bloom = builder.build_bloom_filter();
857
858 bloom.add(b"hello");
859 assert_eq!(bloom.contains(b"hello"), true);
860
861 hashes = bloom.hashes();
862
863 bloom.save_to_file("no_hashes.bloom");
864 }
865
866 let bloom2 = BloomFilter::from_file("no_hashes.bloom", hashes);
867 fs::remove_file("no_hashes.bloom").unwrap();
868
869 assert_eq!(bloom2.contains(b"hello"), true);
870 assert_eq!(bloom2.contains(b"world"), false);
871
872}
873
874
875#[test]
876fn counting_bloom_test() {
877 let mut builder =
878 FilterBuilder::new(10_000, 0.01);
879 let mut bloom = builder.build_counting_bloom_filter();
880
881 bloom.add(b"hello");
882
883 assert_eq!(bloom.contains(b"hello"), true);
884
885 bloom.remove(b"hello");
886 assert_eq!(bloom.contains(b"hello"), false);
887}
888
889#[test]
890fn counting_bloom_repeat_test() {
891 let mut builder = FilterBuilder::new(100_000, 0.01);
892 builder.enable_repeat_insert(true);
894 let mut cbf = builder.build_counting_bloom_filter();
895 cbf.add(b"hello"); cbf.add(b"hello"); assert_eq!(cbf.contains(b"hello"), true);
898 cbf.remove(b"hello");
899 assert_eq!(cbf.contains(b"hello"), true);
900 cbf.remove(b"hello");
901 assert_eq!(cbf.contains(b"hello"), false);
902
903 builder.enable_repeat_insert(false);
905 let mut cbf = builder.build_counting_bloom_filter();
906 cbf.add(b"hello"); cbf.add(b"hello"); assert_eq!(cbf.contains(b"hello"), true);
909 cbf.remove(b"hello");
910 assert_eq!(cbf.contains(b"hello"), false);
911}
912
913#[test]
914fn counting_bloom_from_test() {
915 let mut builder = FilterBuilder::new(10_000_000, 0.01);
916 let mut cbf = builder.build_counting_bloom_filter();
917
918 cbf.add(b"hello");
919 cbf.add(b"hello");
920
921 let mut cbf_copy = CountingBloomFilter::from_u8_array(cbf.get_u8_array(), builder.hashes, true);
922 assert_eq!(cbf_copy.contains(b"hello"), true);
923 cbf_copy.remove(b"hello");
924 assert_eq!(cbf_copy.contains(b"hello"), true);
925 cbf_copy.remove(b"hello");
926 assert_eq!(cbf_copy.contains(b"hello"), false);
927
928 let mut cbf_copy = CountingBloomFilter::from_u16_array(cbf.get_u16_array(), builder.hashes, true);
929 assert_eq!(cbf_copy.contains(b"hello"), true);
930 cbf_copy.remove(b"hello");
931 assert_eq!(cbf_copy.contains(b"hello"), true);
932 cbf_copy.remove(b"hello");
933 assert_eq!(cbf_copy.contains(b"hello"), false);
934
935 let mut cbf_copy = CountingBloomFilter::from_u32_array(cbf.get_u32_array(), builder.hashes, true);
936 assert_eq!(cbf_copy.contains(b"hello"), true);
937 cbf_copy.remove(b"hello");
938 assert_eq!(cbf_copy.contains(b"hello"), true);
939 cbf_copy.remove(b"hello");
940 assert_eq!(cbf_copy.contains(b"hello"), false);
941
942 #[cfg(target_pointer_width = "64")]{
943 let mut cbf_copy = CountingBloomFilter::from_u64_array(cbf.get_u64_array(), builder.hashes, true);
944 assert_eq!(cbf_copy.contains(b"hello"), true);
945 cbf_copy.remove(b"hello");
946 assert_eq!(cbf_copy.contains(b"hello"), true);
947 cbf_copy.remove(b"hello");
948 assert_eq!(cbf_copy.contains(b"hello"), false);
949 }
950}
951
952#[test]
953fn counting_bloom_hash_indices_test() {
954 let mut builder =
955 FilterBuilder::new(10_000, 0.01);
956 let mut bloom = builder.build_counting_bloom_filter();
957
958 bloom.add(b"hello");
959
960 assert_eq!(bloom.contains(b"hello"), true);
961 assert_eq!(bloom.contains_hash_indices(&bloom.get_hash_indices(b"hello")), true);
962 assert_eq!(bloom.contains_hash_indices(&bloom.get_hash_indices(b"world")), false);
963
964
965 bloom.remove(b"hello");
966 assert_eq!(bloom.contains(b"hello"), false);
967 assert_eq!(bloom.contains_hash_indices(&bloom.get_hash_indices(b"hello")), false);
968}
969
970#[test]
971fn counting_bloom_estimate_count() {
972 let mut builder =
973 FilterBuilder::new(10_000, 0.01);
974 let mut bloom = builder.build_counting_bloom_filter();
975
976 bloom.add(b"hello");
977 bloom.add(b"world");
978
979 assert_eq!(bloom.estimate_count(b"hello"), 1);
980 let indices = bloom.get_hash_indices(b"hello");
981
982 for index in indices {
983 assert_eq!(bloom.counter_at(index), 1)
984 }
985
986 assert_eq!(bloom.estimate_count(b"world"), 1);
987 for index in bloom.get_hash_indices(b"world") {
988 assert!(bloom.counter_at(index) <= 2);
989 }
990}