#![allow(clippy::cast_precision_loss)]
#![allow(clippy::cast_possible_truncation)]
#![allow(clippy::cast_sign_loss)]
#![allow(clippy::cast_possible_wrap)]
use parking_lot::RwLock;
use rustc_hash::FxHasher;
use std::hash::{Hash, Hasher};
pub struct BloomFilter {
bits: RwLock<Vec<u64>>,
num_bits: usize,
num_hashes: u32,
count: RwLock<usize>,
}
impl BloomFilter {
#[must_use]
pub fn new(capacity: usize, false_positive_rate: f64) -> Self {
let num_bits = Self::optimal_bits(capacity, false_positive_rate);
let num_hashes = Self::optimal_hashes(num_bits, capacity);
Self::with_params(num_bits, num_hashes)
}
#[must_use]
pub fn with_params(num_bits: usize, num_hashes: u32) -> Self {
let num_words = num_bits.div_ceil(64);
Self {
bits: RwLock::new(vec![0u64; num_words]),
num_bits,
num_hashes,
count: RwLock::new(0),
}
}
pub fn insert<T: Hash>(&self, item: &T) {
let mut bits = self.bits.write();
for i in 0..self.num_hashes {
let (word_index, bit_mask) = self.bit_position(item, i);
bits[word_index] |= bit_mask;
}
*self.count.write() += 1;
}
#[must_use]
pub fn contains<T: Hash>(&self, item: &T) -> bool {
let bits = self.bits.read();
for i in 0..self.num_hashes {
let (word_index, bit_mask) = self.bit_position(item, i);
if bits[word_index] & bit_mask == 0 {
return false;
}
}
true
}
#[must_use]
pub fn definitely_not_contains<T: Hash>(&self, item: &T) -> bool {
!self.contains(item)
}
#[must_use]
pub fn count(&self) -> usize {
*self.count.read()
}
pub fn clear(&self) {
let mut bits = self.bits.write();
for word in bits.iter_mut() {
*word = 0;
}
*self.count.write() = 0;
}
#[must_use]
pub fn estimated_fpr(&self) -> f64 {
let bits = self.bits.read();
let set_bits: usize = bits.iter().map(|w| w.count_ones() as usize).sum();
let fill_ratio = set_bits as f64 / self.num_bits as f64;
fill_ratio.powi(self.num_hashes as i32)
}
#[inline]
fn bit_position<T: Hash>(&self, item: &T, seed: u32) -> (usize, u64) {
let hash = Self::hash_with_seed(item, seed);
let bit_index = (hash as usize) % self.num_bits;
(bit_index / 64, 1u64 << (bit_index % 64))
}
fn optimal_bits(capacity: usize, fpr: f64) -> usize {
let ln2_sq = std::f64::consts::LN_2 * std::f64::consts::LN_2;
(-(capacity as f64) * fpr.ln() / ln2_sq).ceil() as usize
}
fn optimal_hashes(num_bits: usize, capacity: usize) -> u32 {
let k = (num_bits as f64 / capacity as f64) * std::f64::consts::LN_2;
k.ceil() as u32
}
fn hash_with_seed<T: Hash>(item: &T, seed: u32) -> u64 {
let mut hasher = FxHasher::default();
seed.hash(&mut hasher);
item.hash(&mut hasher);
hasher.finish()
}
}
impl Default for BloomFilter {
fn default() -> Self {
Self::new(10_000, 0.01)
}
}