use std::hash::{Hash, Hasher};
use std::marker::PhantomData;
use ahash::AHasher;
use bit_vec::BitVec;
const FALSE_POS_PROB: f64 = -1.0;
const LN_2: f64 = core::f64::consts::LN_2;
const LN_2_SQR: f64 = LN_2 * LN_2;
pub struct BloomFilter<T: ?Sized> {
k: u64,
m: usize,
hashers: [AHasher; 2],
pub bitmap: BitVec,
_phantom: PhantomData<T>,
}
impl<T: ?Sized> BloomFilter<T> {
#[coverage(off)]
pub fn new(size: usize, false_pos_rate: f64) -> Self {
let k = Self::optimal_k(false_pos_rate);
let m = Self::optimal_m(false_pos_rate, size);
let bitmap = BitVec::from_elem(m, false);
let hashers = [
AHasher::new_with_keys(fastrand::u128(..), fastrand::u128(..)),
AHasher::new_with_keys(fastrand::u128(..), fastrand::u128(..)),
];
BloomFilter {
k,
m,
hashers,
bitmap,
_phantom: PhantomData,
}
}
#[coverage(off)]
fn optimal_m(false_pos_rate: f64, size: usize) -> usize {
((size as f64 * FALSE_POS_PROB * false_pos_rate.ln()) / LN_2_SQR).ceil() as usize
}
#[coverage(off)]
fn optimal_k(false_pos_rate: f64) -> u64 {
(false_pos_rate.ln() * FALSE_POS_PROB / LN_2).ceil() as u64
}
#[coverage(off)]
fn hash(&self, t: &T) -> (u64, u64)
where
T: Hash,
{
let hash1 = &mut self.hashers[0].clone();
let hash2 = &mut self.hashers[1].clone();
t.hash(hash1);
t.hash(hash2);
(hash1.finish(), hash2.finish())
}
#[coverage(off)]
fn find_index(&self, i: u64, hash1: u64, hash2: u64) -> usize {
hash1.wrapping_add((i).wrapping_mul(hash2)) as usize % self.m
}
#[coverage(off)]
pub fn insert(&mut self, t: &T)
where
T: Hash,
{
let (hash1, hash2) = self.hash(t);
for i in 0..self.k {
let index = self.find_index(i, hash1, hash2);
self.bitmap.set(index, true);
}
}
#[coverage(off)]
pub fn contains(&mut self, t: &T) -> bool
where
T: Hash,
{
let (hash1, hash2) = self.hash(t);
for i in 0..self.k {
let index = self.find_index(i, hash1, hash2);
if !self.bitmap.get(index).unwrap() {
return false;
}
}
true
}
}