pub struct BloomFilter {
bits: Vec<u8>,
k: usize,
m: usize,
count: usize,
}
impl BloomFilter {
#[must_use]
pub fn new(capacity: usize, error_rate: f64) -> Self {
assert!(
error_rate > 0.0 && error_rate < 1.0,
"error_rate must be in (0,1)"
);
assert!(capacity > 0, "capacity must be > 0");
#[allow(
clippy::cast_precision_loss,
clippy::cast_possible_truncation,
clippy::cast_sign_loss
)]
let m = (-(capacity as f64) * error_rate.ln()
/ (std::f64::consts::LN_2 * std::f64::consts::LN_2))
.ceil() as usize;
#[allow(clippy::cast_precision_loss, clippy::cast_possible_truncation)]
#[allow(
clippy::cast_precision_loss,
clippy::cast_possible_truncation,
clippy::cast_sign_loss
)]
let k = ((m as f64 / capacity as f64) * std::f64::consts::LN_2).round() as usize;
Self {
bits: vec![0u8; m.div_ceil(8)],
k: k.max(1),
m,
count: 0,
}
}
fn hash(bytes: &[u8], seed: u32, m: usize) -> usize {
let mut h: u32 = 0x811c_9dc5_u32 ^ seed;
for &b in bytes {
h ^= u32::from(b);
h = h.wrapping_mul(0x0100_0193);
}
(h as usize) % m
}
pub fn add(&mut self, item: impl AsRef<[u8]>) {
let bytes = item.as_ref();
for i in 0..self.k {
#[allow(clippy::cast_possible_truncation)]
let idx = Self::hash(bytes, (i as u32).wrapping_mul(0x9e37_79b9), self.m);
self.bits[idx >> 3] |= 1 << (idx & 7);
}
self.count += 1;
}
#[must_use]
pub fn has(&self, item: impl AsRef<[u8]>) -> bool {
let bytes = item.as_ref();
(0..self.k).all(|i| {
#[allow(clippy::cast_possible_truncation)]
let idx = Self::hash(bytes, (i as u32).wrapping_mul(0x9e37_79b9), self.m);
self.bits[idx >> 3] & (1 << (idx & 7)) != 0
})
}
#[must_use]
pub const fn len(&self) -> usize {
self.count
}
#[must_use]
pub const fn is_empty(&self) -> bool {
self.count == 0
}
pub fn clear(&mut self) {
self.bits.fill(0);
self.count = 0;
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn no_false_negatives() {
let mut bf = BloomFilter::new(500, 0.01);
let items: Vec<String> = (0..100).map(|i| format!("item-{i}")).collect();
for x in &items {
bf.add(x);
}
assert!(items.iter().all(|x| bf.has(x.as_str())));
}
#[test]
fn absent_item_false() {
let mut bf = BloomFilter::new(1000, 0.001);
bf.add("seen");
assert!(!bf.has("unseen"));
}
#[test]
fn accepts_byte_slices() {
let mut bf = BloomFilter::new(100, 0.01);
bf.add(b"bytes" as &[u8]);
assert!(bf.has(b"bytes" as &[u8]));
assert!(!bf.has(b"other" as &[u8]));
}
#[test]
fn len_and_clear() {
let mut bf = BloomFilter::new(100, 0.01);
assert!(bf.is_empty());
bf.add("a");
bf.add("b");
assert_eq!(bf.len(), 2);
bf.clear();
assert!(bf.is_empty());
assert!(!bf.has("a"));
}
}