extern crate fnv;
use fnv::FnvHasher;
use std::hash::Hasher;
const KEY_SIZE: usize = 12;
const ARRAY_SIZE: usize = 1 << KEY_SIZE;
const KEY_MASK: u32 = (1 << KEY_SIZE) - 1;
const KEY_SHIFT: usize = 16;
pub struct BloomFilter {
counters: [u8; ARRAY_SIZE],
}
impl Clone for BloomFilter {
#[inline]
fn clone(&self) -> BloomFilter {
BloomFilter {
counters: self.counters,
}
}
}
impl BloomFilter {
#[inline]
pub fn new() -> BloomFilter {
BloomFilter {
counters: [0; ARRAY_SIZE],
}
}
#[inline]
fn first_slot(&self, hash: u32) -> &u8 {
&self.counters[hash1(hash) as usize]
}
#[inline]
fn first_mut_slot(&mut self, hash: u32) -> &mut u8 {
&mut self.counters[hash1(hash) as usize]
}
#[inline]
fn second_slot(&self, hash: u32) -> &u8 {
&self.counters[hash2(hash) as usize]
}
#[inline]
fn second_mut_slot(&mut self, hash: u32) -> &mut u8 {
&mut self.counters[hash2(hash) as usize]
}
#[inline]
pub fn clear(&mut self) {
self.counters = [0; ARRAY_SIZE]
}
#[inline]
fn insert_hash(&mut self, hash: u32) {
{
let slot1 = self.first_mut_slot(hash);
if !full(slot1) {
*slot1 += 1
}
}
{
let slot2 = self.second_mut_slot(hash);
if !full(slot2) {
*slot2 += 1
}
}
}
#[inline]
pub fn insert<T:BloomHash>(&mut self, elem: &T) {
self.insert_hash(elem.bloom_hash())
}
#[inline]
fn remove_hash(&mut self, hash: u32) {
{
let slot1 = self.first_mut_slot(hash);
if !full(slot1) {
*slot1 -= 1
}
}
{
let slot2 = self.second_mut_slot(hash);
if !full(slot2) {
*slot2 -= 1
}
}
}
#[inline]
pub fn remove<T:BloomHash>(&mut self, elem: &T) {
self.remove_hash(elem.bloom_hash())
}
#[inline]
fn might_contain_hash(&self, hash: u32) -> bool {
*self.first_slot(hash) != 0 && *self.second_slot(hash) != 0
}
#[inline]
pub fn might_contain<T:BloomHash>(&self, elem: &T) -> bool {
self.might_contain_hash(elem.bloom_hash())
}
}
pub trait BloomHash {
fn bloom_hash(&self) -> u32;
}
impl BloomHash for u64 {
#[inline]
fn bloom_hash(&self) -> u32 {
((*self >> 32) ^ *self) as u32
}
}
impl BloomHash for isize {
#[allow(exceeding_bitshifts)]
#[inline]
fn bloom_hash(&self) -> u32 {
((*self >> 32) ^ *self) as u32
}
}
impl BloomHash for usize {
#[allow(exceeding_bitshifts)]
#[inline]
fn bloom_hash(&self) -> u32 {
((*self >> 32) ^ *self) as u32
}
}
impl BloomHash for String {
#[inline]
fn bloom_hash(&self) -> u32 {
let mut h = FnvHasher::default();
h.write(self.as_bytes());
h.finish().bloom_hash()
}
}
#[inline]
fn full(slot: &u8) -> bool {
*slot == 0xff
}
#[inline]
fn hash1(hash: u32) -> u32 {
hash & KEY_MASK
}
#[inline]
fn hash2(hash: u32) -> u32 {
(hash >> KEY_SHIFT) & KEY_MASK
}
#[test]
fn create_and_insert_some_stuff() {
let mut bf = BloomFilter::new();
for i in 0_usize .. 1000 {
bf.insert(&i);
}
for i in 0_usize .. 1000 {
assert!(bf.might_contain(&i));
}
let false_positives =
(1001_usize .. 2000).filter(|i| bf.might_contain(i)).count();
assert!(false_positives < 10);
for i in 0_usize .. 100 {
bf.remove(&i);
}
for i in 100_usize .. 1000 {
assert!(bf.might_contain(&i));
}
let false_positives = (0_usize .. 100).filter(|i| bf.might_contain(i)).count();
assert!(false_positives < 2);
bf.clear();
for i in 0_usize .. 2000 {
assert!(!bf.might_contain(&i));
}
}
#[cfg(test)]
mod bench {
extern crate test;
use std::hash::{Hash, Hasher, SipHasher};
use super::BloomFilter;
#[bench]
fn create_insert_1000_remove_100_lookup_100(b: &mut test::Bencher) {
b.iter(|| {
let mut bf = BloomFilter::new();
for i in 0_usize .. 1000 {
bf.insert(&i);
}
for i in 0_usize .. 100 {
bf.remove(&i);
}
for i in 100_usize .. 200 {
test::black_box(bf.might_contain(&i));
}
});
}
#[bench]
fn might_contain(b: &mut test::Bencher) {
let mut bf = BloomFilter::new();
for i in 0_usize .. 1000 {
bf.insert(&i);
}
let mut i = 0_usize;
b.bench_n(1000, |b| {
b.iter(|| {
test::black_box(bf.might_contain(&i));
i += 1;
});
});
}
#[bench]
fn insert(b: &mut test::Bencher) {
let mut bf = BloomFilter::new();
b.bench_n(1000, |b| {
let mut i = 0_usize;
b.iter(|| {
test::black_box(bf.insert(&i));
i += 1;
});
});
}
#[bench]
fn remove(b: &mut test::Bencher) {
let mut bf = BloomFilter::new();
for i in 0_usize .. 1000 {
bf.insert(&i);
}
b.bench_n(1000, |b| {
let mut i = 0_usize;
b.iter(|| {
bf.remove(&i);
i += 1;
});
});
test::black_box(bf.might_contain(&0_usize));
}
#[bench]
fn hash_a_uint(b: &mut test::Bencher) {
let mut i = 0_usize;
b.iter(|| {
let mut hasher = SipHasher::default();
i.hash(&mut hasher);
test::black_box(hasher.finish());
i += 1;
})
}
}