use super::BloomFilter;
use crate::codec::family::Family;
use crate::hash::DEFAULT_UPDATE_SEED;
#[derive(Debug, Clone)]
pub struct BloomFilterBuilder {
num_bits: u64,
num_hashes: u16,
seed: u64,
}
impl BloomFilterBuilder {
pub const MIN_NUM_BITS: u64 = 1;
pub const MAX_NUM_BITS: u64 = (i32::MAX as u64 - Family::BLOOMFILTER.max_pre_longs as u64) * 64;
pub const MIN_NUM_HASHES: u16 = 1;
pub const MAX_NUM_HASHES: u16 = i16::MAX as u16;
pub fn with_accuracy(max_items: u64, fpp: f64) -> Self {
assert!(max_items > 0, "max_items must be greater than 0");
assert!(
fpp > 0.0 && fpp <= 1.0,
"fpp must be between 0.0 and 1.0 (inclusive of 1.0)"
);
let num_bits = Self::suggest_num_bits(max_items, fpp);
let num_hashes = Self::suggest_num_hashes_from_accuracy(max_items, num_bits);
BloomFilterBuilder {
num_bits,
num_hashes,
seed: DEFAULT_UPDATE_SEED,
}
}
pub fn with_size(num_bits: u64, num_hashes: u16) -> Self {
assert!(
(Self::MIN_NUM_BITS..=Self::MAX_NUM_BITS).contains(&num_bits),
"num_bits must be between {} and {}, got {}",
Self::MIN_NUM_BITS,
Self::MAX_NUM_BITS,
num_bits,
);
assert!(
(Self::MIN_NUM_HASHES..=Self::MAX_NUM_HASHES).contains(&num_hashes),
"num_bits must be between {} and {}, got {}",
Self::MIN_NUM_HASHES,
Self::MAX_NUM_HASHES,
num_hashes
);
BloomFilterBuilder {
num_bits,
num_hashes,
seed: DEFAULT_UPDATE_SEED,
}
}
pub fn seed(mut self, seed: u64) -> Self {
self.seed = seed;
self
}
pub fn build(self) -> BloomFilter {
let num_hashes = self.num_hashes;
let num_words = self.num_bits.div_ceil(64) as usize;
let bit_array = vec![0u64; num_words].into_boxed_slice();
BloomFilter {
seed: self.seed,
num_hashes,
num_bits_set: 0,
bit_array,
}
}
pub fn suggest_num_bits(max_items: u64, fpp: f64) -> u64 {
let n = max_items as f64;
let p = fpp;
let ln2_squared = std::f64::consts::LN_2 * std::f64::consts::LN_2;
let bits = (-n * p.ln() / ln2_squared).ceil() as u64;
bits.clamp(Self::MIN_NUM_BITS, Self::MAX_NUM_BITS)
}
pub fn suggest_num_hashes_from_accuracy(max_items: u64, num_bits: u64) -> u16 {
let m = num_bits as f64;
let n = max_items as f64;
let k = (m / n * std::f64::consts::LN_2).ceil();
k.clamp(
f64::from(Self::MIN_NUM_HASHES),
f64::from(Self::MAX_NUM_HASHES),
) as u16
}
pub fn suggest_num_hashes_from_fpp(fpp: f64) -> u16 {
let k = -fpp.log2();
k.ceil().clamp(
f64::from(Self::MIN_NUM_HASHES),
f64::from(Self::MAX_NUM_HASHES),
) as u16
}
}