use super::BloomFilter;
use crate::hash::DEFAULT_UPDATE_SEED;
const MIN_NUM_BITS: u64 = 64;
const MAX_NUM_BITS: u64 = (1u64 << 35) - 64;
#[derive(Debug, Clone)]
pub struct BloomFilterBuilder {
num_bits: u64,
num_hashes: u16,
seed: u64,
}
impl BloomFilterBuilder {
pub fn with_accuracy(max_items: u64, fpp: f64) -> Self {
assert!(max_items > 0, "max_items must be greater than 0");
assert!(
fpp > 0.0 && fpp < 1.0,
"fpp must be between 0.0 and 1.0 (exclusive)"
);
let num_bits = Self::suggest_num_bits(max_items, fpp);
let num_hashes = Self::suggest_num_hashes_from_accuracy(max_items, num_bits);
BloomFilterBuilder {
num_bits,
num_hashes,
seed: DEFAULT_UPDATE_SEED,
}
}
pub fn with_size(num_bits: u64, num_hashes: u16) -> Self {
assert!(
num_bits >= MIN_NUM_BITS,
"num_bits must be at least {}",
MIN_NUM_BITS
);
assert!(
num_bits <= MAX_NUM_BITS,
"num_bits must not exceed {}",
MAX_NUM_BITS
);
assert!(num_hashes >= 1, "num_hashes must be at least 1");
assert!(num_hashes <= 100, "num_hashes must not exceed 100");
BloomFilterBuilder {
num_bits,
num_hashes,
seed: DEFAULT_UPDATE_SEED,
}
}
pub fn seed(mut self, seed: u64) -> Self {
self.seed = seed;
self
}
pub fn build(self) -> BloomFilter {
let capacity_bits = self.num_bits;
let num_hashes = self.num_hashes;
let num_words = capacity_bits.div_ceil(64) as usize;
let bit_array = vec![0u64; num_words];
BloomFilter {
seed: self.seed,
num_hashes,
capacity_bits,
num_bits_set: 0,
bit_array,
}
}
pub fn suggest_num_bits(max_items: u64, fpp: f64) -> u64 {
let n = max_items as f64;
let p = fpp;
let ln2_squared = std::f64::consts::LN_2 * std::f64::consts::LN_2;
let bits = (-n * p.ln() / ln2_squared).ceil() as u64;
let bits = bits.div_ceil(64) * 64;
bits.clamp(MIN_NUM_BITS, MAX_NUM_BITS)
}
pub fn suggest_num_hashes_from_accuracy(max_items: u64, num_bits: u64) -> u16 {
let m = num_bits as f64;
let n = max_items as f64;
let k = (m / n * std::f64::consts::LN_2).round();
(k as u16).clamp(1, 100) }
pub fn suggest_num_hashes_from_fpp(fpp: f64) -> u16 {
let k = -fpp.log2();
(k.round() as u16).clamp(1, 100)
}
}