use core::{
f64::consts::LN_2,
hash::{Hash, Hasher},
};
use libm::{ceil, log, log2, pow};
use rand_core::RngCore;
use siphasher::sip::SipHasher13;
use crate::bit_vec::BitVec;
#[derive(Debug, Clone)]
pub struct Bloom {
bits: BitVec,
hash_fn_number: usize,
hashers: [SipHasher13; 2],
}
impl Bloom {
pub fn new_with_key(items: usize, err_rate: f64, keys: [(u64, u64); 2]) -> Self {
let bits_size = Self::calculate_bits_vec_size(items, err_rate);
let hash_fn_number = Self::calculate_hash_fn_number(err_rate);
let [key0, key1] = keys;
let hashers = [
SipHasher13::new_with_keys(key0.0, key0.1),
SipHasher13::new_with_keys(key1.0, key1.1),
];
Self {
bits: BitVec::new(bits_size),
hash_fn_number,
hashers,
}
}
pub fn new_with_rng<R: RngCore>(items: usize, err_rate: f64, rng: &mut R) -> Self {
let hash_fn_number = Self::calculate_hash_fn_number(err_rate);
let keys = [
(rng.next_u64(), rng.next_u64()),
(rng.next_u64(), rng.next_u64()),
];
let hashers = [
SipHasher13::new_with_keys(keys[0].0, keys[0].1),
SipHasher13::new_with_keys(keys[1].0, keys[1].1),
];
Self {
bits: BitVec::new(Self::calculate_bits_vec_size(items, err_rate)),
hash_fn_number,
hashers,
}
}
pub fn set<T>(&mut self, item: &T)
where
T: Hash,
{
let (h1, h2) = self.bloom_hash(item);
for i in 0..self.hash_fn_number {
let index = self.get_index((h1, h2), i as u64);
self.bits.set(index);
}
}
pub fn contain<T>(&self, item: &T) -> bool
where
T: Hash,
{
let (h1, h2) = self.bloom_hash(item);
for i in 0..self.hash_fn_number {
let index = self.get_index((h1, h2), i as u64);
if !self.bits.contain(index) {
return false;
}
}
true
}
#[inline]
fn bloom_hash<T>(&self, item: &T) -> (u64, u64)
where
T: Hash,
{
let mut hasher1 = self.hashers[0];
let mut hasher2 = self.hashers[1];
item.hash(&mut hasher1);
item.hash(&mut hasher2);
(hasher1.finish(), hasher2.finish())
}
#[inline]
fn get_index(&self, (h1, h2): (u64, u64), i: u64) -> usize {
let len = self.bits.len() as u64;
(h1.wrapping_add(i.wrapping_mul(h2)) % len) as usize
}
#[inline]
fn calculate_bits_vec_size(items: usize, fp_rate: f64) -> usize {
assert!(items > 0, "Number of items must be > 0");
assert!(
(0.0..1.0).contains(&fp_rate),
"False positive rate must be between 0 and 1"
);
ceil(-((items as f64 * log(fp_rate)) / (pow(LN_2, 2.0) * 8.0))) as usize
}
#[inline]
fn calculate_hash_fn_number(fp_rate: f64) -> usize {
ceil(-log2(fp_rate)) as usize
}
#[inline]
pub fn capacity_in_bits(&self) -> usize {
self.bits.capacity_in_bits()
}
#[inline]
pub fn clear(&mut self) {
self.bits.clear();
}
}