const BLOOM_BITS: usize = 4096;
const BLOOM_BYTES: usize = BLOOM_BITS / 8;
const NUM_HASHES: u32 = 3;
pub(crate) struct BloomFilter {
bits: [u8; BLOOM_BYTES],
count: usize,
}
impl BloomFilter {
pub fn new() -> Self {
BloomFilter {
bits: [0; BLOOM_BYTES],
count: 0,
}
}
#[inline]
pub fn insert(&mut self, key: &[u8]) {
let h = Self::hash(key);
for i in 0..NUM_HASHES {
let bit = Self::nth_hash(h, i) % (BLOOM_BITS as u64);
self.bits[(bit / 8) as usize] |= 1 << (bit % 8);
}
self.count += 1;
}
#[inline]
pub fn may_contain(&self, key: &[u8]) -> bool {
if self.count == 0 {
return false;
}
let h = Self::hash(key);
for i in 0..NUM_HASHES {
let bit = Self::nth_hash(h, i) % (BLOOM_BITS as u64);
if self.bits[(bit / 8) as usize] & (1 << (bit % 8)) == 0 {
return false;
}
}
true
}
pub fn is_empty(&self) -> bool {
self.count == 0
}
pub fn clear(&mut self) {
self.bits = [0; BLOOM_BYTES];
self.count = 0;
}
#[inline]
fn hash(key: &[u8]) -> (u64, u64) {
let mut h1: u64 = 0xcbf29ce484222325;
for &b in key {
h1 ^= b as u64;
h1 = h1.wrapping_mul(0x100000001b3);
}
let h2 = h1.wrapping_mul(0x9e3779b97f4a7c15).rotate_left(31);
(h1, h2)
}
#[inline]
fn nth_hash(hashes: (u64, u64), n: u32) -> u64 {
hashes.0.wrapping_add((n as u64).wrapping_mul(hashes.1))
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn empty_filter_contains_nothing() {
let bf = BloomFilter::new();
assert!(bf.is_empty());
assert!(!bf.may_contain(b"hello"));
assert!(!bf.may_contain(b"world"));
}
#[test]
fn inserted_keys_are_found() {
let mut bf = BloomFilter::new();
bf.insert(b"hello");
bf.insert(b"world");
assert!(!bf.is_empty());
assert!(bf.may_contain(b"hello"));
assert!(bf.may_contain(b"world"));
}
#[test]
fn missing_keys_usually_not_found() {
let mut bf = BloomFilter::new();
for i in 0..100u32 {
bf.insert(&i.to_le_bytes());
}
let mut false_positives = 0;
for i in 1000..2000u32 {
if bf.may_contain(&i.to_le_bytes()) {
false_positives += 1;
}
}
assert!(false_positives < 100, "too many false positives: {}", false_positives);
}
#[test]
fn clear_resets_filter() {
let mut bf = BloomFilter::new();
bf.insert(b"hello");
assert!(bf.may_contain(b"hello"));
bf.clear();
assert!(bf.is_empty());
assert!(!bf.may_contain(b"hello"));
}
}