extern crate core;
extern crate bit_vec;
use bit_vec::BitVec;
use std::cmp::{min,max};
use std::collections::hash_map::RandomState;
use std::hash::{BuildHasher,Hash};
use super::{ASMS,Intersectable,Unionable};
use super::hashing::HashIter;
pub struct BloomFilter<R = RandomState, S = RandomState> {
bits: BitVec,
num_hashes: u32,
hash_builder_one: R,
hash_builder_two: S,
}
impl BloomFilter<RandomState, RandomState> {
pub fn with_size(num_bits: usize, num_hashes: u32) -> BloomFilter<RandomState, RandomState> {
BloomFilter {
bits: BitVec::from_elem(num_bits,false),
num_hashes: num_hashes,
hash_builder_one: RandomState::new(),
hash_builder_two: RandomState::new(),
}
}
pub fn with_rate(rate: f32, expected_num_items: u32) -> BloomFilter<RandomState, RandomState> {
let bits = needed_bits(rate,expected_num_items);
BloomFilter::with_size(bits,optimal_num_hashes(bits,expected_num_items))
}
}
impl<R,S> BloomFilter<R,S>
where R: BuildHasher, S: BuildHasher
{
pub fn with_size_and_hashers(num_bits: usize, num_hashes: u32,
hash_builder_one: R, hash_builder_two: S) -> BloomFilter<R,S> {
BloomFilter {
bits: BitVec::from_elem(num_bits,false),
num_hashes: num_hashes,
hash_builder_one: hash_builder_one,
hash_builder_two: hash_builder_two,
}
}
pub fn with_rate_and_hashers(rate: f32, expected_num_items: u32,
hash_builder_one: R, hash_builder_two: S) -> BloomFilter<R, S> {
let bits = needed_bits(rate,expected_num_items);
BloomFilter::with_size_and_hashers(bits,optimal_num_hashes(bits,expected_num_items),
hash_builder_one,hash_builder_two)
}
pub fn num_bits(&self) -> usize {
self.bits.len()
}
pub fn num_hashes(&self) -> u32 {
self.num_hashes
}
}
impl<R,S> ASMS for BloomFilter<R,S>
where R: BuildHasher, S: BuildHasher {
fn insert<T: Hash>(& mut self,item: &T) -> bool {
let mut contained = true;
for h in HashIter::from(item,
self.num_hashes,
&self.hash_builder_one,
&self.hash_builder_two) {
let idx = (h % self.bits.len() as u64) as usize;
match self.bits.get(idx) {
Some(b) => {
if !b {
contained = false;
}
}
None => { panic!("Hash mod failed in insert"); }
}
self.bits.set(idx,true)
}
!contained
}
fn contains<T: Hash>(&self, item: &T) -> bool {
for h in HashIter::from(item,
self.num_hashes,
&self.hash_builder_one,
&self.hash_builder_two) {
let idx = (h % self.bits.len() as u64) as usize;
match self.bits.get(idx) {
Some(b) => {
if !b {
return false;
}
}
None => { panic!("Hash mod failed"); }
}
}
true
}
fn clear(&mut self) {
self.bits.clear();
}
}
impl Intersectable for BloomFilter {
fn intersect(&mut self, other: &BloomFilter) -> bool {
self.bits.intersect(&other.bits)
}
}
impl Unionable for BloomFilter {
fn union(&mut self, other: &BloomFilter) -> bool {
self.bits.union(&other.bits)
}
}
pub fn optimal_num_hashes(num_bits: usize, num_items: u32) -> u32 {
min(
max(
(num_bits as f32 / num_items as f32 * core::f32::consts::LN_2).round() as u32,
2
),
200
)
}
pub fn needed_bits(false_pos_rate:f32, num_items: u32) -> usize {
let ln22 = core::f32::consts::LN_2 * core::f32::consts::LN_2;
(num_items as f32 * ((1.0/false_pos_rate).ln() / ln22)).round() as usize
}
#[cfg(test)]
extern crate rand;
#[cfg(feature = "do-bench")]
#[cfg(test)]
mod bench {
extern crate test;
use self::test::Bencher;
use bloom::rand::{self,Rng};
use super::BloomFilter;
use ASMS;
#[bench]
fn insert_benchmark(b: &mut Bencher) {
let cnt = 500000;
let rate = 0.01 as f32;
let mut bf:BloomFilter = BloomFilter::with_rate(rate,cnt);
let mut rng = rand::thread_rng();
b.iter(|| {
let v = rng.gen::<i32>();
bf.insert(&v);
})
}
#[bench]
fn contains_benchmark(b: &mut Bencher) {
let cnt = 500000;
let rate = 0.01 as f32;
let mut bf:BloomFilter = BloomFilter::with_rate(rate,cnt);
let mut rng = rand::thread_rng();
let mut i = 0;
while i < cnt {
let v = rng.gen::<i32>();
bf.insert(&v);
i+=1;
}
b.iter(|| {
let v = rng.gen::<i32>();
bf.contains(&v);
})
}
}
#[cfg(test)]
mod tests {
use std::collections::HashSet;
use bloom::rand::{self,Rng};
use super::{BloomFilter,needed_bits,optimal_num_hashes};
use {ASMS,Intersectable,Unionable};
#[test]
fn simple() {
let mut b:BloomFilter = BloomFilter::with_rate(0.01,100);
b.insert(&1);
assert!(b.contains(&1));
assert!(!b.contains(&2));
b.clear();
assert!(!b.contains(&1));
}
#[test]
fn intersect() {
let mut b1:BloomFilter = BloomFilter::with_rate(0.01,20);
b1.insert(&1);
b1.insert(&2);
let mut b2:BloomFilter = BloomFilter::with_rate(0.01,20);
b2.insert(&1);
b1.intersect(&b2);
assert!(b1.contains(&1));
assert!(!b1.contains(&2));
}
#[test]
fn union() {
let mut b1:BloomFilter = BloomFilter::with_rate(0.01,20);
b1.insert(&1);
let mut b2:BloomFilter = BloomFilter::with_rate(0.01,20);
b2.insert(&2);
b1.union(&b2);
assert!(b1.contains(&1));
assert!(b1.contains(&2));
}
#[test]
fn fpr_test() {
let cnt = 500000;
let rate = 0.01 as f32;
let bits = needed_bits(rate,cnt);
assert_eq!(bits, 4792529);
let hashes = optimal_num_hashes(bits,cnt);
assert_eq!(hashes, 7);
let mut b:BloomFilter = BloomFilter::with_rate(rate,cnt);
let mut set:HashSet<i32> = HashSet::new();
let mut rng = rand::thread_rng();
let mut i = 0;
while i < cnt {
let v = rng.gen::<i32>();
set.insert(v);
b.insert(&v);
i+=1;
}
i = 0;
let mut false_positives = 0;
while i < cnt {
let v = rng.gen::<i32>();
match (b.contains(&v),set.contains(&v)) {
(true, false) => { false_positives += 1; }
(false, true) => { assert!(false); } _ => {}
}
i+=1;
}
let actual_rate = false_positives as f32 / cnt as f32;
assert!(actual_rate > (rate-0.001));
assert!(actual_rate < (rate+0.001));
}
}