use std::hash::{Hash, Hasher};
use std::marker::PhantomData;
use fnv::FnvHasher;
const NUM_BITS: u64 = 64;
const OFFSET_ONE: u64 = 0xcbf2_9ce4_8422_2325;
const OFFSET_TWO: u64 = 0xe10_3ad8_2dad_8028;
#[derive(Clone, Copy)]
pub(crate) struct Bloom<T: ?Sized> {
bits: u64,
data: PhantomData<T>,
entry_count: usize,
}
impl<T: ?Sized + Hash> Bloom<T> {
pub fn new() -> Self {
Self::default()
}
#[cfg(test)]
pub fn entry_count(&self) -> usize {
self.entry_count
}
#[allow(dead_code)]
pub fn to_raw(&self) -> u64 {
self.bits
}
pub fn clear(&mut self) {
self.bits = 0;
self.entry_count = 0;
}
pub fn add(&mut self, item: &T) {
let mask = self.make_bit_mask(item);
self.bits |= mask;
self.entry_count += 1;
}
pub fn may_contain(&self, item: &T) -> bool {
let mask = self.make_bit_mask(item);
self.bits & mask == mask
}
pub fn union(&self, other: Bloom<T>) -> Bloom<T> {
Bloom {
bits: self.bits | other.bits,
data: PhantomData,
entry_count: self.entry_count + other.entry_count,
}
}
#[inline]
fn make_bit_mask(&self, item: &T) -> u64 {
let hash1 = self.make_hash(item, OFFSET_ONE);
let hash2 = self.make_hash(item, OFFSET_TWO);
(1 << (hash1 % NUM_BITS)) | (1 << (hash2 % NUM_BITS))
}
#[inline]
fn make_hash(&self, item: &T, seed: u64) -> u64 {
let mut hasher = FnvHasher::with_key(seed);
item.hash(&mut hasher);
hasher.finish()
}
}
impl<T: ?Sized> std::fmt::Debug for Bloom<T> {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
write!(f, "Bloom: {:064b}: ({})", self.bits, self.entry_count)
}
}
impl<T: ?Sized> Default for Bloom<T> {
fn default() -> Self {
Bloom {
bits: 0,
data: PhantomData,
entry_count: 0,
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use test_log::test;
#[test]
fn very_good_test() {
let mut bloom = Bloom::default();
for i in 0..100 {
bloom.add(&i);
assert!(bloom.may_contain(&i));
}
bloom.clear();
for i in 0..100 {
assert!(!bloom.may_contain(&i));
}
}
#[test]
fn union() {
let mut bloom1 = Bloom::default();
bloom1.add(&0);
bloom1.add(&1);
assert!(!bloom1.may_contain(&2));
assert!(!bloom1.may_contain(&3));
let mut bloom2 = Bloom::default();
bloom2.add(&2);
bloom2.add(&3);
assert!(!bloom2.may_contain(&0));
assert!(!bloom2.may_contain(&1));
let bloom3 = bloom1.union(bloom2);
assert!(bloom3.may_contain(&0));
assert!(bloom3.may_contain(&1));
assert!(bloom3.may_contain(&2));
assert!(bloom3.may_contain(&3));
}
}