use alloc::vec;
use alloc::vec::Vec;
pub struct BloomFilter {
bits: Vec<u8>,
k: usize,
m: usize,
count: usize,
}
impl BloomFilter {
#[cfg(feature = "std")]
#[must_use]
pub fn new(capacity: usize, error_rate: f64) -> Self {
assert!(
error_rate > 0.0 && error_rate < 1.0,
"error_rate must be in (0,1)"
);
assert!(capacity > 0, "capacity must be > 0");
#[allow(
clippy::cast_precision_loss,
clippy::cast_possible_truncation,
clippy::cast_sign_loss
)]
let m = (-(capacity as f64) * error_rate.ln()
/ (core::f64::consts::LN_2 * core::f64::consts::LN_2))
.ceil() as usize;
#[allow(clippy::cast_precision_loss, clippy::cast_possible_truncation)]
#[allow(
clippy::cast_precision_loss,
clippy::cast_possible_truncation,
clippy::cast_sign_loss
)]
let k = ((m as f64 / capacity as f64) * core::f64::consts::LN_2).round() as usize;
Self::from_raw_params(m, k.max(1))
}
#[must_use]
pub fn from_raw_params(bit_count: usize, hash_count: usize) -> Self {
assert!(bit_count > 0, "bit_count must be > 0");
assert!(hash_count > 0, "hash_count must be > 0");
Self {
bits: vec![0u8; bit_count.div_ceil(8)],
k: hash_count,
m: bit_count,
count: 0,
}
}
fn hash(bytes: &[u8], seed: u32, m: usize) -> usize {
let mut h: u32 = 0x811c_9dc5_u32 ^ seed;
for &b in bytes {
h ^= u32::from(b);
h = h.wrapping_mul(0x0100_0193);
}
(h as usize) % m
}
pub fn add(&mut self, item: impl AsRef<[u8]>) {
let bytes = item.as_ref();
for i in 0..self.k {
#[allow(clippy::cast_possible_truncation)]
let idx = Self::hash(bytes, (i as u32).wrapping_mul(0x9e37_79b9), self.m);
self.bits[idx >> 3] |= 1 << (idx & 7);
}
self.count = self.count.saturating_add(1);
}
#[must_use]
pub fn has(&self, item: impl AsRef<[u8]>) -> bool {
let bytes = item.as_ref();
(0..self.k).all(|i| {
#[allow(clippy::cast_possible_truncation)]
let idx = Self::hash(bytes, (i as u32).wrapping_mul(0x9e37_79b9), self.m);
self.bits[idx >> 3] & (1 << (idx & 7)) != 0
})
}
#[must_use]
pub const fn len(&self) -> usize {
self.count
}
#[must_use]
pub const fn is_empty(&self) -> bool {
self.count == 0
}
pub fn clear(&mut self) {
self.bits.fill(0);
self.count = 0;
}
}
#[cfg(all(test, feature = "std"))]
mod tests {
use super::*;
#[test]
fn no_false_negatives() {
let mut bf = BloomFilter::new(500, 0.01);
let items: Vec<String> = (0..100).map(|i| format!("item-{i}")).collect();
for x in &items {
bf.add(x);
}
assert!(items.iter().all(|x| bf.has(x.as_str())));
}
#[test]
fn absent_item_false() {
let mut bf = BloomFilter::new(1000, 0.001);
bf.add("seen");
assert!(!bf.has("unseen"));
}
#[test]
fn accepts_byte_slices() {
let mut bf = BloomFilter::new(100, 0.01);
bf.add(b"bytes" as &[u8]);
assert!(bf.has(b"bytes" as &[u8]));
assert!(!bf.has(b"other" as &[u8]));
}
#[test]
fn len_and_clear() {
let mut bf = BloomFilter::new(100, 0.01);
assert!(bf.is_empty());
bf.add("a");
bf.add("b");
assert_eq!(bf.len(), 2);
bf.clear();
assert!(bf.is_empty());
assert!(!bf.has("a"));
}
#[test]
#[should_panic(expected = "capacity must be > 0")]
fn panics_on_zero_capacity() {
let _ = BloomFilter::new(0, 0.01);
}
#[test]
#[should_panic(expected = "error_rate must be in (0,1)")]
fn panics_on_zero_error_rate() {
let _ = BloomFilter::new(100, 0.0);
}
#[test]
#[should_panic(expected = "error_rate must be in (0,1)")]
fn panics_on_one_error_rate() {
let _ = BloomFilter::new(100, 1.0);
}
#[test]
fn from_raw_params_works() {
let mut bf = BloomFilter::from_raw_params(1024, 7);
bf.add("hello");
assert!(bf.has("hello"));
assert!(!bf.has("world"));
}
#[test]
fn capacity_one() {
let mut bf = BloomFilter::new(1, 0.01);
bf.add("only");
assert!(bf.has("only"));
}
#[test]
fn high_error_rate() {
let mut bf = BloomFilter::new(100, 0.99);
bf.add("x");
assert!(bf.has("x"));
}
#[test]
fn empty_key() {
let mut bf = BloomFilter::new(100, 0.01);
bf.add("");
assert!(bf.has(""));
}
#[test]
fn false_positive_rate_within_bounds() {
let capacity = 10_000;
let target_fpr = 0.05;
let mut bf = BloomFilter::new(capacity, target_fpr);
for i in 0..capacity {
bf.add(format!("present-{i}"));
}
let test_count = 50_000;
let mut false_positives = 0;
for i in 0..test_count {
if bf.has(format!("absent-{i}")) {
false_positives += 1;
}
}
let observed_fpr = f64::from(false_positives) / f64::from(test_count);
assert!(
observed_fpr < target_fpr * 2.0,
"Observed FPR {observed_fpr:.4} exceeds 2x target {target_fpr}"
);
}
#[test]
fn empty_filter_has_returns_false() {
let bf = BloomFilter::new(100, 0.01);
assert!(!bf.has("anything"));
assert!(!bf.has(""));
assert!(!bf.has(b"bytes" as &[u8]));
}
#[test]
fn single_item_insert_and_query() {
let mut bf = BloomFilter::new(1000, 0.01);
bf.add("only-one");
assert!(bf.has("only-one"));
assert_eq!(bf.len(), 1);
assert!(!bf.is_empty());
}
#[test]
fn clear_then_has_returns_false() {
let mut bf = BloomFilter::new(100, 0.01);
bf.add("x");
bf.add("y");
assert!(bf.has("x"));
bf.clear();
assert!(!bf.has("x"));
assert!(!bf.has("y"));
assert_eq!(bf.len(), 0);
assert!(bf.is_empty());
}
#[test]
fn duplicate_add_does_not_corrupt() {
let mut bf = BloomFilter::new(100, 0.01);
bf.add("dup");
bf.add("dup");
bf.add("dup");
assert!(bf.has("dup"));
assert_eq!(bf.len(), 3);
}
#[test]
fn from_raw_params_minimal() {
let mut bf = BloomFilter::from_raw_params(1, 1);
bf.add("a");
assert!(bf.has("a"));
}
#[test]
#[should_panic(expected = "bit_count must be > 0")]
fn from_raw_params_zero_bits_panics() {
let _ = BloomFilter::from_raw_params(0, 1);
}
#[test]
#[should_panic(expected = "hash_count must be > 0")]
fn from_raw_params_zero_hashes_panics() {
let _ = BloomFilter::from_raw_params(1, 0);
}
}