use crate::{math::*, AtomicBloomFilter, BloomFilter, BuildHasher, DefaultHasher};
use alloc::vec::Vec;
use core::{cmp::max, f64::consts::LN_2, hash::Hash};
macro_rules! builder_with_bits {
($name:ident, $($m:ident)?, $bloom:ident) => {
#[doc = concat!("This type can be used to construct an instance of [`", stringify!($bloom), "`] via the builder pattern.")]
#[doc = concat!("use fastbloom::", stringify!($bloom), ";")]
#[doc = concat!("let builder = ", stringify!($bloom), "::with_num_bits(1024);")]
#[doc = concat!("let builder = ", stringify!($bloom), "::from_vec(vec![0; 8]);")]
#[derive(Debug, Clone)]
pub struct $name<S = DefaultHasher> {
pub(crate) data: Vec<u64>,
pub(crate) hasher: S,
}
impl<S: BuildHasher> PartialEq for $name<S> {
fn eq(&self, other: &Self) -> bool {
self.data == other.data
}
}
impl<S: BuildHasher> Eq for $name<S> {}
impl $name {
#[doc = concat!("use fastbloom::", stringify!($bloom), ";")]
#[doc = concat!("let bloom = ", stringify!($bloom), "::with_num_bits(1024).seed(&1).hashes(4);")]
pub fn seed(mut self, seed: &u128) -> Self {
self.hasher = DefaultHasher::seeded(&seed.to_be_bytes());
self
}
}
impl<S: BuildHasher> $name<S> {
#[doc = concat!("use fastbloom::", stringify!($bloom), ";")]
#[doc = concat!("let bloom = ", stringify!($bloom), "::with_num_bits(1024).hasher(RandomState::default()).hashes(4);")]
pub fn hasher<H: BuildHasher>(self, hasher: H) -> $name<H> {
$name::<H> {
data: self.data,
hasher,
}
}
#[doc = concat!("empty [`", stringify!($bloom), "`].")]
#[doc = concat!("use fastbloom::", stringify!($bloom), ";")]
#[doc = concat!("let bloom = ", stringify!($bloom), "::with_num_bits(1024).hashes(4);")]
pub fn hashes(self, num_hashes: u32) -> $bloom<S> {
$bloom {
bits: self.data.into_iter().collect(),
num_hashes_minus_one: max(1, num_hashes) - 1,
hasher: self.hasher,
}
}
#[doc = concat!("empty [`", stringify!($bloom), "`]. The number of hashes is optimized based on `expected_items`")]
#[doc = concat!("to maximize Bloom filter accuracy (minimize false positives chance on [`", stringify!($bloom), "::contains`]).")]
#[doc = concat!("use fastbloom::", stringify!($bloom), ";")]
#[doc = concat!("let bloom = ", stringify!($bloom), "::with_num_bits(1024).expected_items(500);")]
pub fn expected_items(self, expected_items: usize) -> $bloom<S> {
let expected_items = max(1, expected_items);
let hashes = optimal_hashes(self.data.len() * 64, expected_items);
self.hashes(hashes)
}
#[doc = concat!("\"Consumes\" this builder and constructs a [`", stringify!($bloom), "`] containing")]
#[doc = concat!("(minimize false positives chance on [`", stringify!($bloom), "::contains`]).")]
#[doc = concat!("use fastbloom::", stringify!($bloom), ";")]
#[doc = concat!("let bloom = ", stringify!($bloom), "::with_num_bits(1024).items([1, 2, 3].iter());")]
pub fn items<'a, H: Hash + 'a, I: IntoIterator<IntoIter = impl ExactSizeIterator<Item = &'a H>>>(
self,
items: I,
) -> $bloom<S> {
let into_iter = items.into_iter();
let $($m)? filter = self.expected_items(into_iter.len());
filter.insert_all(into_iter);
filter
}
}
};
}
builder_with_bits!(BuilderWithBits, mut, BloomFilter);
builder_with_bits!(AtomicBuilderWithBits, , AtomicBloomFilter);
macro_rules! builder_with_fp {
($name:ident, $($m:ident)?, $bloom:ident) => {
#[doc = concat!("use fastbloom::", stringify!($bloom), ";")]
#[doc = concat!("let builder = ", stringify!($bloom), "::with_false_pos(0.01);")]
#[derive(Debug, Clone)]
pub struct $name<S = DefaultHasher> {
pub(crate) desired_fp_rate: f64,
pub(crate) hasher: S,
}
impl<S: BuildHasher> PartialEq for $name<S> {
fn eq(&self, other: &Self) -> bool {
self.desired_fp_rate == other.desired_fp_rate
}
}
impl<S: BuildHasher> Eq for $name<S> {}
impl $name {
#[doc = concat!("use fastbloom::", stringify!($bloom), ";")]
#[doc = concat!("let bloom = ", stringify!($bloom), "::with_false_pos(0.001).seed(&1).expected_items(100);")]
pub fn seed(mut self, seed: &u128) -> Self {
self.hasher = DefaultHasher::seeded(&seed.to_be_bytes());
self
}
}
impl<S: BuildHasher> $name<S> {
#[doc = concat!("Sets the hasher for this builder. The later constructed [`", stringify!($bloom), "`] will use")]
#[doc = concat!("use fastbloom::", stringify!($bloom), ";")]
#[doc = concat!("let bloom = ", stringify!($bloom), "::with_false_pos(0.001).hasher(RandomState::default()).expected_items(100);")]
pub fn hasher<H: BuildHasher>(self, hasher: H) -> $name<H> {
$name::<H> {
desired_fp_rate: self.desired_fp_rate,
hasher,
}
}
#[doc = concat!("empty [`", stringify!($bloom), "`]. The number of hashes is optimized based on `expected_items`")]
#[doc = concat!("to maximize Bloom filter accuracy (minimize false positives chance on [`", stringify!($bloom), "::contains`]).")]
#[doc = concat!("use fastbloom::", stringify!($bloom), ";")]
#[doc = concat!("let bloom = ", stringify!($bloom), "::with_false_pos(0.001).expected_items(500);")]
pub fn expected_items(self, expected_items: usize) -> $bloom<S> {
let expected_items = max(1, expected_items);
let num_bits = optimal_size(expected_items, self.desired_fp_rate);
$bloom::new_builder(num_bits)
.hasher(self.hasher)
.expected_items(expected_items)
}
#[doc = concat!("\"Consumes\" this builder and constructs a [`", stringify!($bloom), "`] containing")]
#[doc = concat!("use fastbloom::", stringify!($bloom), ";")]
#[doc = concat!("let bloom = ", stringify!($bloom), "::with_false_pos(0.001).items([1, 2, 3].iter());")]
pub fn items<'a, H: Hash + 'a, I: IntoIterator<IntoIter = impl ExactSizeIterator<Item = &'a H>>>(
self,
items: I,
) -> $bloom<S> {
let into_iter = items.into_iter();
let $($m)? filter = self.expected_items(into_iter.len());
filter.insert_all(into_iter);
filter
}
}
};
}
builder_with_fp!(BuilderWithFalsePositiveRate, mut, BloomFilter);
builder_with_fp!(AtomicBuilderWithFalsePositiveRate, , AtomicBloomFilter);
pub fn optimal_hashes(num_bits: usize, num_items: usize) -> u32 {
let num_bits = num_bits as f64;
let hashes = LN_2 * num_bits / num_items as f64;
max(round(hashes) as u32, 1)
}
pub fn optimal_size(num_items: usize, fp: f64) -> usize {
let num_items = num_items as f64;
let log2_2 = LN_2 * LN_2;
let result = 8 * ceil(num_items * ln(fp) / (-8.0 * log2_2)) as usize;
max(result, 64)
}
pub fn expected_density(hashes: u32, bits: usize, items: usize) -> f64 {
let total_sets = (items * hashes as usize) as f64;
let bits = bits as f64;
let prob_set = 1.0 / bits;
let prob_not_set = 1.0 - prob_set;
let prob_all_not_set = crate::math::pow(prob_not_set, total_sets);
1.0 - prob_all_not_set
}
pub fn expected_false_pos(hashes: u32, density: f64) -> f64 {
crate::math::pow(density, hashes as f64)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_expected_false_pos() {
for items_mag in 1..=32 {
let items = 2usize.pow(items_mag);
for fp_mag in 1..=16 {
let target_fp = 1.0f64 / 10u64.pow(fp_mag) as f64;
let size = optimal_size(items, target_fp);
let thresh = if size < 256 {
0.1 } else {
0.01
};
let h = optimal_hashes(size, items);
let density = expected_density(h, size, items);
let expected_fp = expected_false_pos(h, density);
let err = (expected_fp - target_fp) / target_fp;
assert!(err < thresh);
}
}
}
fn density_err(d: f64) -> f64 {
(0.5 - d).abs()
}
#[test]
fn test_optimal_hashes() {
for bits_mag in 6..=16 {
let bits = 2usize.pow(bits_mag);
for items_mag in 1..=16 {
let items = 2usize.pow(items_mag);
let h = optimal_hashes(bits, items);
if h > 1000 {
continue;
}
let density = expected_density(h, bits, items);
assert!(density_err(density) <= density_err(expected_density(h + 1, bits, items)));
assert!(density_err(density) <= density_err(expected_density(h - 1, bits, items)));
}
}
}
}
#[cfg(test)]
mod for_accuracy_tests {
use crate::BloomFilter;
#[test]
fn data_size() {
let size_bits = 512 * 1000;
let bloom = BloomFilter::with_num_bits(size_bits).hashes(4);
assert_eq!(bloom.num_bits(), size_bits);
}
#[test]
fn specified_hashes() {
for num_hashes in 1..1000 {
assert_eq!(
num_hashes,
BloomFilter::with_num_bits(1)
.hashes(num_hashes)
.num_hashes()
);
assert_eq!(
num_hashes,
BloomFilter::with_num_bits(1)
.hashes(num_hashes)
.num_hashes()
);
}
}
}
#[cfg(test)]
mod for_size_tests {
use crate::{AtomicBloomFilter, BloomFilter};
#[test]
fn test_size() {
let _: BloomFilter = BloomFilter::new_with_false_pos(0.0001).expected_items(10000);
}
#[test]
fn test_zero_hashes() {
let bloom = BloomFilter::with_num_bits(512).hashes(0);
assert_eq!(bloom.num_hashes(), 1);
let bloom = AtomicBloomFilter::with_num_bits(512).hashes(0);
assert_eq!(bloom.num_hashes(), 1);
}
}