1#![allow(rustdoc::bare_urls)]
2#![warn(unreachable_pub)]
3#![doc = include_str!("../README.md")]
4#![cfg_attr(not(feature = "std"), no_std)]
5
6extern crate alloc;
7use alloc::vec::Vec;
8use core::hash::{BuildHasher, Hash, Hasher};
9use core::iter::repeat;
10mod hasher;
11pub use hasher::DefaultHasher;
12use hasher::DoubleHasher;
13mod builder;
14pub use builder::{
15 expected_density, expected_false_pos, optimal_hashes, optimal_size, AtomicBuilderWithBits,
16 AtomicBuilderWithFalsePositiveRate, BuilderWithBits, BuilderWithFalsePositiveRate,
17};
18mod bit_vector;
19use bit_vector::{AtomicBitVec, BitVec};
20mod math;
21
22#[cfg(feature = "loom")]
23pub(crate) use loom::sync::atomic::AtomicU64;
24
25#[cfg(not(feature = "loom"))]
26pub(crate) use portable_atomic::AtomicU64;
27
28#[cfg(all(feature = "loom", feature = "serde"))]
29compile_error!("features `loom` and `serde` are mutually exclusive");
30
31macro_rules! impl_bloom {
32 ($name:ident, $builder_bits:ident, $builder_fp:ident, $bitvec:ident, $bits:ty, $ismut:literal, $($m:ident)?) => {
33 #[doc = concat!("use fastbloom::", stringify!($name), ";")]
48 #[doc = concat!("let ", $ismut, "filter = ", stringify!($name), "::with_num_bits(1024).expected_items(2);")]
50 #[doc = concat!("use fastbloom::", stringify!($name), ";")]
56 #[doc = concat!("let filter = ", stringify!($name), "::with_false_pos(0.001).items([\"42\", \"🦀\"].iter());")]
58 #[doc = concat!("use fastbloom::", stringify!($name), ";")]
64 #[doc = concat!("let filter = ", stringify!($name), "::with_num_bits(1024)")]
67 #[derive(Debug, Clone)]
71 #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
72 pub struct $name<S = DefaultHasher> {
73 bits: $bitvec,
74 num_hashes_minus_one: u32,
75 hasher: S,
76 }
77
78 impl $name {
79 fn new_builder(num_bits: usize) -> $builder_bits {
80 assert!(num_bits > 0);
81 let num_u64s = (num_bits + 64 - 1) / 64;
84 $builder_bits {
85 data: repeat(0).take(num_u64s).collect(),
86 hasher: Default::default(),
87 }
88 }
89
90 fn new_from_vec(vec: Vec<u64>) -> $builder_bits {
91 assert!(!vec.is_empty());
92 $builder_bits {
93 data: vec,
94 hasher: Default::default(),
95 }
96 }
97
98 fn new_with_false_pos(fp: f64) -> $builder_fp {
99 assert!(fp > 0.0);
100 $builder_fp {
101 desired_fp_rate: fp,
102 hasher: Default::default(),
103 }
104 }
105
106 #[doc = concat!("use fastbloom::", stringify!($name), ";")]
114 #[doc = concat!("let filter = ", stringify!($name), "::with_false_pos(0.001).expected_items(1000);")]
115 pub fn with_false_pos(fp: f64) -> $builder_fp {
117 $name::new_with_false_pos(fp)
118 }
119
120 #[doc = concat!("use fastbloom::", stringify!($name), ";")]
127 #[doc = concat!("let filter = ", stringify!($name), "::with_num_bits(1024).hashes(4);")]
128 pub fn with_num_bits(num_bits: usize) -> $builder_bits {
130 $name::new_builder(num_bits)
131 }
132
133 #[doc = concat!("use fastbloom::", stringify!($name), ";")]
140 #[doc = concat!("let orig = ", stringify!($name), "::with_false_pos(0.001).seed(&42).items([1, 2].iter());")]
142 #[doc = concat!("let new = ", stringify!($name), "::from_vec(orig.iter().collect()).seed(&42).hashes(num_hashes);")]
144 pub fn from_vec(bit_vec: Vec<u64>) -> $builder_bits {
149 $name::new_from_vec(bit_vec)
150 }
151 }
152
153 impl<S: BuildHasher> $name<S> {
154 #[doc = concat!("use fastbloom::", stringify!($name), ";")]
164 #[doc = concat!("let bloom = ", stringify!($name), "::with_num_bits(1024).items([1, 2, 3].iter());")]
166 #[inline]
169 pub fn contains(&self, val: &(impl Hash + ?Sized)) -> bool {
170 self.contains_hash(self.source_hash(val))
171 }
172
173 #[inline]
180 pub fn contains_hash(&self, hash: u64) -> bool {
181 match self.bits.check(index(self.num_bits(), hash)) {
182 false => false,
183 true => {
184 let mut hasher = DoubleHasher::new(hash);
185 (0..self.num_hashes_minus_one).all(|_| {
186 let h = hasher.next();
187 self.bits.check(index(self.num_bits(), h))
188 })
189 }
190 }
191 }
192
193 #[inline]
195 pub fn num_hashes(&self) -> u32 {
196 self.num_hashes_minus_one + 1
197 }
198
199 pub fn num_bits(&self) -> usize {
201 self.bits.num_bits()
202 }
203
204 #[inline]
206 pub fn iter(&self) -> impl Iterator<Item = u64> + '_ {
207 self.bits.iter()
208 }
209
210 #[inline]
212 pub fn as_slice(&self) -> &[$bits] {
213 self.bits.as_slice()
214 }
215
216 #[inline]
221 pub fn source_hash(&self, val: &(impl Hash + ?Sized)) -> u64 {
222 let mut state = self.hasher.build_hasher();
223 val.hash(&mut state);
224 state.finish()
225 }
226
227 pub fn expected_false_pos(&self, num_items: usize) -> f64 {
229 let density = crate::expected_density(self.num_hashes(), self.num_bits(), num_items);
230 crate::expected_false_pos(self.num_hashes(), density)
231 }
232
233 #[doc = concat!("use fastbloom::", stringify!($name), ";")]
243 #[doc = concat!("let ", $ismut, "bloom = ", stringify!($name), "::with_num_bits(1024).hashes(4);")]
245 #[inline]
249 pub fn insert(&$($m)? self, val: &(impl Hash + ?Sized)) -> bool {
250 self.insert_hash(self.source_hash(val))
251 }
252
253 #[inline]
261 pub fn insert_hash(&$($m)? self, hash: u64) -> bool {
262 let mut previously_contained = true;
263 previously_contained &= self.bits.set(index(self.num_bits(), hash));
264 let mut hasher = DoubleHasher::new(hash);
265 for _ in 0..self.num_hashes_minus_one {
266 let h = hasher.next();
267 previously_contained &= self.bits.set(index(self.num_bits(), h));
268 }
269 previously_contained
270 }
271
272 #[inline]
274 pub fn insert_all<'a, T: Hash + 'a, I: IntoIterator<Item = &'a T>>(&$($m)? self, iter: I) {
275 for val in iter {
276 self.insert(val);
277 }
278 }
279
280 #[inline]
282 pub fn clear(&$($m)? self) {
283 self.bits.clear();
284 }
285
286 #[doc = concat!("use fastbloom::", stringify!($name), ";")]
294 #[doc = concat!("let ", $ismut, "bloom = ", stringify!($name), "::with_num_bits(4096).seed(&1).hashes(4);")]
296 #[doc = concat!("let ", $ismut, "other = ", stringify!($name), "::with_num_bits(4096).seed(&1).hashes(4);")]
297 #[inline]
310 pub fn union(&$($m)? self, other: &Self) {
311 assert_eq!(
312 self.num_hashes(),
313 other.num_hashes(),
314 "expected same number of hashes"
315 );
316 self.bits.union(&other.bits);
317 }
318
319 #[doc = concat!("use fastbloom::", stringify!($name), ";")]
327 #[doc = concat!("let ", $ismut, "bloom = ", stringify!($name), "::with_num_bits(4096).seed(&1).hashes(4);")]
329 #[doc = concat!("let ", $ismut, "other = ", stringify!($name), "::with_num_bits(4096).seed(&1).hashes(4);")]
330 #[inline]
343 pub fn intersect(&$($m)? self, other: &Self) {
344 assert_eq!(
345 self.num_hashes(),
346 other.num_hashes(),
347 "expected same number of hashes"
348 );
349 self.bits.intersect(&other.bits);
350 }
351 }
352
353 impl<T, S: BuildHasher> Extend<T> for $name<S>
354 where
355 T: Hash,
356 {
357 #[inline]
358 fn extend<I: IntoIterator<Item = T>>(&mut self, iter: I) {
359 for val in iter {
360 self.insert(&val);
361 }
362 }
363 }
364
365 impl<S: BuildHasher> PartialEq for $name<S> {
366 fn eq(&self, other: &Self) -> bool {
367 self.bits == other.bits && self.num_hashes() == other.num_hashes()
368 }
369 }
370 impl<S: BuildHasher> Eq for $name<S> {}
371 };
372}
373
374impl_bloom!(
375 BloomFilter,
376 BuilderWithBits,
377 BuilderWithFalsePositiveRate,
378 BitVec,
379 u64,
380 "mut ",
381 mut
382);
383impl_bloom!(
384 AtomicBloomFilter,
385 AtomicBuilderWithBits,
386 AtomicBuilderWithFalsePositiveRate,
387 AtomicBitVec,
388 AtomicU64,
389 "",
390);
391
392#[inline]
397pub(crate) fn index(num_bits: usize, hash: u64) -> usize {
398 ((hash as u128 * num_bits as u128) >> 64) as usize
399}
400
401macro_rules! impl_tests {
402 ($modname:ident, $name:ident) => {
403 #[allow(unused_mut)]
404 #[cfg(not(feature = "loom"))]
405 #[cfg(test)]
406 mod $modname {
407 use super::*;
408 use alloc::format;
409
410 trait Seeded: BuildHasher {
411 fn seeded(seed: &[u8; 16]) -> Self;
412 }
413 impl Seeded for DefaultHasher {
414 fn seeded(seed: &[u8; 16]) -> Self {
415 Self::seeded(seed)
416 }
417 }
418
419 const TRIALS: usize = 20_000_000;
420
421 fn false_pos_rate<H: BuildHasher>(filter: &$name<H>) -> f64 {
422 let mut total = 0;
423 let mut false_positives = 0;
424 for x in non_member_nums() {
425 total += 1;
426 false_positives += filter.contains_hash(x) as usize;
427 }
428 (false_positives as f64) / (total as f64)
429 }
430
431 fn member_nums(num: usize) -> impl Iterator<Item = u64> {
432 random_numbers(num, 5)
433 }
434
435 fn non_member_nums() -> impl Iterator<Item = u64> {
436 random_numbers(TRIALS, 7)
437 }
438
439 fn random_numbers(num: usize, seed: u64) -> impl Iterator<Item = u64> {
440 let mut rng = fastrand::Rng::with_seed(seed);
441 (0..=num).map(move |_| rng.u64(..))
442 }
443
444 #[test]
445 fn test_to_from_vec() {
446 fn to_from_(size: usize) {
447 let mut b = $name::new_builder(size).seed(&1).hashes(3);
448 b.extend(member_nums(1000));
449 let b2 = $name::new_from_vec(b.iter().collect())
450 .seed(&1)
451 .hashes(3);
452 assert_eq!(b, b2);
453 assert_eq!(b.num_bits(), b.bits.len() * 64);
454 assert!(size <= b.bits.len() * 64);
455 assert!((size + u64::BITS as usize) > b.bits.len() * 64);
456 }
457 for size in 1..=10009 {
458 to_from_(size);
459 }
460 }
461
462 #[test]
463 fn first_insert_false() {
464 let mut filter = $name::with_num_bits(1202).expected_items(4);
465 assert!(!filter.insert(&5));
466 }
467
468 #[test]
469 fn target_fp_is_accurate_actual() {
470 target_fp_is_accurate(1..=8, 3..=6, |bloom: &mut $name, num_items: usize| {
471 for x in member_nums(num_items) {
472 bloom.insert_hash(x);
473 }
474 false_pos_rate(&bloom)
475 });
476 }
477
478 #[test]
479 fn target_fp_is_accurate_expected() {
480 target_fp_is_accurate(1..=8, 2..=7, |bloom: &mut $name, num_items: usize| {
481 bloom.expected_false_pos(num_items)
482 });
483 }
484
485 fn target_fp_is_accurate(
486 target_fp_range: core::ops::RangeInclusive<u32>,
487 num_items_range: core::ops::RangeInclusive<u32>,
488 measure_fp: fn(&mut $name, usize) -> f64,
489 ) {
490 let thresh = 1.0f64;
493
494 for fp_mag in target_fp_range {
496 let fp = 1.0f64 / 10u64.pow(fp_mag) as f64;
497
498 for num_items_mag in num_items_range.clone() {
500 let num_items = 10usize.pow(num_items_mag);
501 let allocated_bytes = crate::optimal_size(num_items, fp) >> 3;
502
503 assert!(allocated_bytes < 64_000_000, "About to allocate {} bytes", allocated_bytes);
505
506 let mut filter = $name::new_with_false_pos(fp)
507 .seed(&42)
508 .expected_items(num_items);
509 let sample_fp = measure_fp(&mut filter, num_items);
510 let err = (sample_fp - fp) / fp;
511 let size_bits = filter.num_bits();
512 assert!(sample_fp < fp || err < thresh, "err {err:}, thresh {thresh:}, num_items: {num_items:}, size bits: {size_bits:}, fp: {fp:}, sample fp: {sample_fp:}");
513 }
514 }
515 }
516
517 #[test]
518 fn nothing_after_clear() {
519 for mag in 1..6 {
520 let size = 10usize.pow(mag);
521 for bloom_size_mag in 6..10 {
522 let num_blocks_bytes = 1 << bloom_size_mag;
523 let num_bits = num_blocks_bytes * 8;
524 let mut filter = $name::new_builder(num_bits)
525 .seed(&7)
526 .expected_items(size);
527 filter.extend(member_nums(size));
528 assert!(filter.num_hashes() > 0);
529 filter.clear();
530 assert!(member_nums(size).all(|x| !filter.contains(&x)));
531 }
532 }
533 }
534
535 #[test]
536 fn random_inserts_always_contained() {
537 for mag in 1..6 {
538 let size = 10usize.pow(mag);
539 for bloom_size_mag in 6..10 {
540 let num_blocks_bytes = 1 << bloom_size_mag;
541 let num_bits = num_blocks_bytes * 8;
542 let mut filter = $name::new_builder(num_bits).expected_items(size);
543 filter.extend(member_nums(size));
544 assert!(member_nums(size).all(|x| filter.contains(&x)));
545 assert!(member_nums(size).all(|x| filter.insert(&x)));
546 }
547 }
548 }
549
550 #[test]
551 fn test_optimal_hashes_is_optimal() {
552 fn test_optimal_hashes_is_optimal_<H: Seeded>() {
553 let sizes = [1000, 2000, 5000, 6000, 8000, 10000];
554 for num_items in sizes {
555 let num_bits = 65000 * 8;
556 let mut filter = $name::new_builder(num_bits)
557 .hasher(H::seeded(&[42; 16]))
558 .expected_items(num_items);
559 filter.extend(member_nums(num_items));
560
561 let fp_to_beat = false_pos_rate(&filter);
562 let optimal_hashes = filter.num_hashes();
563
564 for num_hashes in [optimal_hashes - 1, optimal_hashes + 1] {
565 let mut test_filter = $name::new_builder(num_bits)
566 .hasher(H::seeded(&[42; 16]))
567 .hashes(num_hashes);
568 test_filter.extend(member_nums(num_items));
569 let fp = false_pos_rate(&test_filter);
570 assert!(fp_to_beat <= fp);
571 }
572 }
573 }
574 test_optimal_hashes_is_optimal_::<DefaultHasher>();
575 }
576
577 #[test]
578 fn seeded_is_same() {
579 let num_bits = 1 << 10;
580 let sample_vals = member_nums(1000).collect::<Vec<_>>();
581 for x in 0u8..32 {
582 let seed = x as u128;
583 assert_eq!(
584 $name::new_builder(num_bits)
585 .seed(&seed)
586 .items(sample_vals.iter()),
587 $name::new_builder(num_bits)
588 .seed(&seed)
589 .items(sample_vals.iter())
590 );
591 assert!(
592 !($name::new_builder(num_bits)
593 .seed(&(seed + 1))
594 .items(sample_vals.iter())
595 == $name::new_builder(num_bits)
596 .seed(&seed)
597 .items(sample_vals.iter()))
598 );
599 }
600 }
601
602 #[test]
603 fn false_pos_decrease_with_size() {
604 for mag in 5..6 {
605 let size = 10usize.pow(mag);
606 let mut prev_fp = 1.0;
607 for num_bits_mag in 9..22 {
608 let num_bits = 1 << num_bits_mag;
609
610 let mut filter = $name::new_builder(num_bits).expected_items(size);
611 for x in member_nums(size) {
612 filter.insert_hash(x);
613 }
614
615 let fp = false_pos_rate(&filter);
616
617 let err = format!(
618 "size: {size:}, num_bits: {num_bits:}, {:.6}, {:?}",
619 fp,
620 filter.num_hashes(),
621 );
622 assert!(
623 fp <= prev_fp,
624 "{}",
625 err
626 );
627 prev_fp = fp;
628 }
629 }
630 }
631
632 fn assert_even_distribution(distr: &[u64], err: f64) {
633 assert!(err > 0.0 && err < 1.0);
634 let expected: i64 = (distr.iter().sum::<u64>() / (distr.len() as u64)) as i64;
635 let thresh = (expected as f64 * err) as i64;
636 for x in distr {
637 let diff = (*x as i64 - expected).abs();
638 assert!(
639 diff <= thresh,
640 "{x:?} deviates from {expected:?}\nDistribution: {distr:?}"
641 );
642 }
643 }
644
645 #[test]
646 fn test_seeded_hash_from_hashes_depth() {
647 for size in [1, 10, 100, 1000] {
648 let mut rng = fastrand::Rng::with_seed(524323);
649 let mut hasher = DoubleHasher::new(rng.u64(..));
650 let mut seeded_hash_counts: Vec<_> = repeat(0).take(size).collect();
651 for _ in 0..(size * 10_000) {
652 let hi = hasher.next();
653 seeded_hash_counts[(hi as usize) % size] += 1;
654 }
655 assert_even_distribution(&seeded_hash_counts, 0.05);
656 }
657 }
658
659 #[test]
660 fn test_debug() {
661 let filter = $name::with_num_bits(1).hashes(1);
662 assert!(!format!("{:?}", filter).is_empty());
663 }
664
665 #[test]
666 fn test_clone() {
667 let filter = $name::with_num_bits(4).hashes(4);
668 let mut cloned = filter.clone();
669 assert_eq!(filter, cloned);
670 cloned.insert(&42);
671 assert!(filter != cloned);
672 }
673
674 #[test]
675 fn eq_constructors_num_bits() {
676 assert_eq!(
677 $name::with_num_bits(4).hashes(4),
678 $name::new_builder(4).hashes(4),
679 );
680 }
681
682 #[test]
683 fn eq_constructors_false_pos() {
684 assert_eq!(
685 $name::with_false_pos(0.4),
686 $name::new_with_false_pos(0.4),
687 );
688 }
689
690 #[test]
691 fn eq_constructors_from_vec() {
692 assert_eq!(
693 $name::from_vec(repeat(42).take(42).collect()),
694 $name::new_from_vec(repeat(42).take(42).collect()),
695 );
696 }
697
698 #[test]
699 fn test_rebuilt_from_vec() {
700 for num in [1, 10, 1000, 100_000] {
701 for fp in [0.1, 0.01, 0.0001, 0.0000001] {
702 let mut b = $name::with_false_pos(fp)
703 .seed(&42)
704 .expected_items(num);
705 b.extend(member_nums(num));
706 let orig_hashes = b.num_hashes();
707 let new = $name::from_vec(b.iter().collect())
708 .seed(&42)
709 .hashes(orig_hashes);
710 assert!(member_nums(num).all(|x| new.contains(&x)));
711 }
712 }
713 }
714
715 #[cfg(feature = "serde")]
716 #[test]
717 fn test_serde() {
718 for num in [1, 10, 1000, 100_000] {
719 for fp in [0.1, 0.01, 0.0001, 0.0000001] {
720 let mut before = $name::with_false_pos(fp)
721 .seed(&42)
722 .expected_items(num);
723 before.extend(member_nums(num));
724
725 let s = serde_cbor::to_vec(&before).unwrap();
726 let mut after: $name = serde_cbor::from_slice(&s).unwrap();
727 assert_eq!(before, after);
728
729 before.extend(member_nums(num * 2));
730 after.extend(member_nums(num * 2));
731 assert_eq!(before, after);
732 }
733 }
734 }
735 }
736 };
737}
738
739impl_tests!(non_atomic, BloomFilter);
740impl_tests!(atomic, AtomicBloomFilter);
741
742#[cfg(not(feature = "loom"))]
743#[cfg(test)]
744mod atomic_parity_tests {
745 #[cfg(feature = "serde")]
746 #[test]
747 fn serde_parity() {
748 use super::*;
749
750 for num_bits in [64, 1024, 4096, 1 << 16] {
751 for seed in 4..=18 {
752 let mut non = BloomFilter::with_num_bits(num_bits)
753 .seed(&seed)
754 .expected_items(100);
755 non.extend(0..=100);
756 let mut atomic = AtomicBloomFilter::with_num_bits(num_bits)
757 .seed(&seed)
758 .expected_items(100);
759 atomic.extend(0..=100);
760
761 let non_bytes = serde_cbor::to_vec(&non).unwrap();
762 let atomic_bytes = serde_cbor::to_vec(&atomic).unwrap();
763 assert_eq!(non_bytes, atomic_bytes);
764
765 let non_from_atomic: BloomFilter = serde_cbor::from_slice(&atomic_bytes).unwrap();
766 let atomic_from_non: AtomicBloomFilter =
767 serde_cbor::from_slice(&non_bytes).unwrap();
768 assert_eq!(non_from_atomic, non);
769 assert_eq!(atomic_from_non, atomic);
770 }
771 }
772 }
773}
774
775#[cfg(feature = "loom")]
776#[cfg(test)]
777mod loom_tests {
778 use super::*;
779
780 #[test]
781 fn test_loom() {
782 loom::model(|| {
783 let b = loom::sync::Arc::new(AtomicBloomFilter::with_num_bits(128).seed(&42).hashes(2));
784 let expected = AtomicBloomFilter::with_num_bits(128).seed(&42).hashes(2);
785 for x in 1..=3 {
786 expected.insert(&x);
787 }
788
789 let handles: Vec<_> = [(1..=2), (2..=3)]
790 .into_iter()
791 .map(|data| {
792 let v = b.clone();
793 loom::thread::spawn(move || {
794 for x in data {
795 v.insert(&x);
796 }
797 })
798 })
799 .collect();
800
801 for handle in handles {
802 handle.join().unwrap();
803 }
804
805 let res = b.iter().collect::<Vec<_>>();
806 assert_eq!(res, expected.iter().collect::<Vec<_>>());
807 });
808 }
809}