[−][src]Struct bloom_filter_simple::KMBloomFilter

pub struct KMBloomFilter<H1, H2> where
    H1: Hasher + Default,
    H2: Hasher + Default,  { /* fields omitted */ }

Bloom filter implementation using the improvements described by Kirsch and Mitzenmacher:

Kirsch A., Mitzenmacher M. (2006) Less Hashing, Same Performance: Building a Better Bloom Filter. In: Azar Y., Erlebach T. (eds) Algorithms – ESA 2006. ESA 2006. Lecture Notes in Computer Science, vol 4168. Springer, Berlin, Heidelberg. https://doi.org/10.1007/11841036_42

Examples

use bloom_filter_simple::{BloomFilter,KMBloomFilter};
use ahash::AHasher;
use std::collections::hash_map::DefaultHasher;

fn main() {
    // We plan on storing at most 10,000 elements
    let desired_capacity = 10_000;
    // We want to assure that the chance of a false positive is less than 0.0001.
    let desired_fp_probability = 0.0001;

    // We initialize a new KMBloomFilter by specifying the desired Hashers as type parameters.
    let mut filter: KMBloomFilter<AHasher, DefaultHasher> = KMBloomFilter::new(desired_capacity, desired_fp_probability);

    // You can insert any type implementing the Hash trait. The bloom filter does not store the
    // inserted elements but only their hashes. Hence, there is no transfer of ownership required.
    filter.insert(&5i32);
    filter.insert(&"Some text");
    filter.insert(&10_000usize);

    // You can check whether a value has been inserted into the filter before.
    assert_eq!(false, filter.contains(&3));
    assert_eq!(true, filter.contains(&5));
    assert_eq!(true, filter.contains(&"Some text"));
}

Implementations

`impl<H1, H2> KMBloomFilter<H1, H2> where H1: Hasher + Default, H2: Hasher + Default,` [src]

`pub fn new( desired_capacity: usize, desired_false_positive_probability: f64 ) -> Self`[src]

Initialize a new instance of KMBloomFilter that guarantees that the false positive rate is less than desired_false_positive_probability for up to desired_capacity elements.

KMBloomFilter uses two hash functions H1 and H2 to simulate an arbitrary number of hash functions. H1 and H2 are specified as type parameters (see examples): KMBloomFilter<H1, H2>.

You have to use two different hash functions for H1 and H2!

Panics

Panics if desired_capacity == 0

Examples

use bloom_filter_simple::{BloomFilter,KMBloomFilter};
use ahash::AHasher;
use std::collections::hash_map::DefaultHasher;

fn main() {
    // We plan on storing at most 10,000 elements
    let desired_capacity = 10_000;
    // We want to assure that the chance of a false positive is less than 0.0001.
    let desired_fp_probability = 0.0001;

    // We initialize a new KMBloomFilter by specifying the desired Hashers as type parameters
    let mut filter: KMBloomFilter<AHasher, DefaultHasher> =
        KMBloomFilter::new(desired_capacity, desired_fp_probability);
}

`pub fn approximate_element_count(&self) -> f64`[src]

Approximate number of elements stored. Approximation technique taken from Wikipedia:

Wikipedia, "Bloom filter" [Accessed: 02.12.2020]

`pub fn approximate_current_false_positive_probability(&self) -> f64`[src]

Return the current approximate false positive probability which depends on the current number of elements in the filter.

The probability is given as a value in the interval [0,1] Approximation technique taken from Sagi Kedmi:

S. Kedmi, "Bloom Filters for the Perplexed", July 2017 [Accessed: 02.12.2020]

`pub fn union(&self, other: &Self) -> Self`[src]

Creates a union of this bloom filter and 'other', which means 'contains' of the resulting bloom filter will always return true for elements inserted in either this bloom filter or in 'other' before creation.

Panics

Panics if the desired capacity or desired false positive probability of 'self' and 'other' differ.

Examples

Union of two bloom filters with the same configuration.

use bloom_filter_simple::{BloomFilter,KMBloomFilter};
use ahash::AHasher;
use std::collections::hash_map::DefaultHasher;

fn main() {
    // The configuration of both bloom filters has to be the same
    let desired_capacity = 10_000;
    let desired_fp_probability = 0.0001;

    // We initialize two new KMBloomFilter 
    let mut filter_one: KMBloomFilter<AHasher, DefaultHasher> = KMBloomFilter::new(
        desired_capacity,
        desired_fp_probability
    );
 
    let mut filter_two: KMBloomFilter<AHasher, DefaultHasher> = KMBloomFilter::new(
        desired_capacity,
        desired_fp_probability
    );
 
    // Insert elements into the first filter
    filter_one.insert(&0);
    filter_one.insert(&1);
 
    // Insert elements into the second filter
    filter_two.insert(&2);
    filter_two.insert(&3);
     
    // Now we retrieve the union of both filters
    let filter_union = filter_one.union(&filter_two);

    // The union will return true for a 'contains' check for the elements inserted 
    // previously into at least one of the constituent filters.
    assert_eq!(true, filter_union.contains(&0));
    assert_eq!(true, filter_union.contains(&1));
    assert_eq!(true, filter_union.contains(&2));
    assert_eq!(true, filter_union.contains(&3));
}

`pub fn intersect(&self, other: &Self) -> Self`[src]

Creates a intersection of this bloom filter and 'other', which means 'contains' of the resulting bloom filter will always return true for elements inserted both in this bloom filter and in 'other' before creation. The false positive probability of the resulting bloom filter is at most the false positive probability of 'other' or 'self'. The false positive probability of the resulting bloom filter may be bigger than the false positive probability of a new empty bloom filter with the intersecting elements inserted. The functions 'approximate_current_false_positive_probability' and 'approximate_element_count' called on the resulting bloom filter may return too big approximations.

Panics

Panics if the desired capacity or desired false positive probability of 'self' and 'other' differ.

Examples

Intersection of two bloom filters with the same configuration.

use bloom_filter_simple::{BloomFilter,KMBloomFilter};
use ahash::AHasher;
use std::collections::hash_map::DefaultHasher;

fn main() {
    // The configuration of both bloom filters has to be the same
    let desired_capacity = 10_000;
    let desired_fp_probability = 0.0001;

    // We initialize two new KMBloomFilter 
    let mut filter_one: KMBloomFilter<AHasher, DefaultHasher> = KMBloomFilter::new(
        desired_capacity,
        desired_fp_probability
    );
 
    let mut filter_two: KMBloomFilter<AHasher, DefaultHasher> = KMBloomFilter::new(
        desired_capacity,
        desired_fp_probability
    );
 
    // Insert elements into the first filter
    filter_one.insert(&0);
    filter_one.insert(&1);
 
    // Insert elements into the second filter
    filter_two.insert(&1);
    filter_two.insert(&2);
     
    // Now we retrieve the intersection of both filters
    let filter_intersection = filter_one.intersect(&filter_two);

    // The intersection will return true for a 'contains' check for the elements inserted 
    // previously into both constituent filters.
    assert_eq!(false, filter_intersection.contains(&0));
    assert_eq!(true, filter_intersection.contains(&1));
    assert_eq!(false, filter_intersection.contains(&2));
}

`pub fn eq_configuration(&self, other: &Self) -> bool`[src]

Checks whether two bloom filters were created with the same desired capacity and desired false positive probability.

Trait Implementations

`impl<H1, H2> BloomFilter for KMBloomFilter<H1, H2> where H1: Hasher + Default, H2: Hasher + Default,` [src]

`pub fn insert<T>(&mut self, data: &T) where T: Hash,` [src]

`pub fn contains<T>(&self, data: &T) -> bool where T: Hash,` [src]

`impl<H1, H2> Debug for KMBloomFilter<H1, H2> where H1: Hasher + Default, H2: Hasher + Default,` [src]

`pub fn fmt(&self, f: &mut Formatter<'_>) -> Result`[src]

Auto Trait Implementations

`impl<H1, H2> RefUnwindSafe for KMBloomFilter<H1, H2> where H1: RefUnwindSafe, H2: RefUnwindSafe,`

`impl<H1, H2> Send for KMBloomFilter<H1, H2> where H1: Send, H2: Send,`

`impl<H1, H2> Sync for KMBloomFilter<H1, H2> where H1: Sync, H2: Sync,`

`impl<H1, H2> Unpin for KMBloomFilter<H1, H2> where H1: Unpin, H2: Unpin,`

`impl<H1, H2> UnwindSafe for KMBloomFilter<H1, H2> where H1: UnwindSafe, H2: UnwindSafe,`

Blanket Implementations

`impl<T> Any for T where T: 'static + ?Sized,` [src]

`pub fn type_id(&self) -> TypeId`[src]

`impl<T> Borrow<T> for T where T: ?Sized,` [src]

`pub fn borrow(&self) -> &T`[src]

`impl<T> BorrowMut<T> for T where T: ?Sized,` [src]

`pub fn borrow_mut(&mut self) -> &mut T`[src]

`impl<T> From<T> for T`[src]

`pub fn from(t: T) -> T`[src]

`impl<T, U> Into<U> for T where U: From<T>,` [src]

`pub fn into(self) -> U`[src]

`impl<T, U> TryFrom<U> for T where U: Into<T>,` [src]

`type Error = Infallible`

The type returned in the event of a conversion error.

`pub fn try_from(value: U) -> Result<T, <T as TryFrom<U>>::Error>`[src]

`impl<T, U> TryInto<U> for T where U: TryFrom<T>,` [src]

`type Error = <U as TryFrom<T>>::Error`

The type returned in the event of a conversion error.

[−][src]Struct bloom_filter_simple::KMBloomFilter

Examples

Implementations

impl<H1, H2> KMBloomFilter<H1, H2> where H1: Hasher + Default, H2: Hasher + Default, [src]

pub fn new( desired_capacity: usize, desired_false_positive_probability: f64) -> Self[src]

Panics

Examples

pub fn approximate_element_count(&self) -> f64[src]

pub fn approximate_current_false_positive_probability(&self) -> f64[src]

pub fn union(&self, other: &Self) -> Self[src]

Panics

Examples

pub fn intersect(&self, other: &Self) -> Self[src]

Panics

Examples

pub fn eq_configuration(&self, other: &Self) -> bool[src]

Trait Implementations

impl<H1, H2> BloomFilter for KMBloomFilter<H1, H2> where H1: Hasher + Default, H2: Hasher + Default, [src]

pub fn insert<T>(&mut self, data: &T) where T: Hash, [src]

pub fn contains<T>(&self, data: &T) -> bool where T: Hash, [src]

impl<H1, H2> Debug for KMBloomFilter<H1, H2> where H1: Hasher + Default, H2: Hasher + Default, [src]

pub fn fmt(&self, f: &mut Formatter<'_>) -> Result[src]

Auto Trait Implementations

impl<H1, H2> RefUnwindSafe for KMBloomFilter<H1, H2> where H1: RefUnwindSafe, H2: RefUnwindSafe,

impl<H1, H2> Send for KMBloomFilter<H1, H2> where H1: Send, H2: Send,

impl<H1, H2> Sync for KMBloomFilter<H1, H2> where H1: Sync, H2: Sync,

impl<H1, H2> Unpin for KMBloomFilter<H1, H2> where H1: Unpin, H2: Unpin,

impl<H1, H2> UnwindSafe for KMBloomFilter<H1, H2> where H1: UnwindSafe, H2: UnwindSafe,

Blanket Implementations

impl<T> Any for T where T: 'static + ?Sized, [src]

pub fn type_id(&self) -> TypeId[src]

impl<T> Borrow<T> for T where T: ?Sized, [src]

pub fn borrow(&self) -> &T[src]

impl<T> BorrowMut<T> for T where T: ?Sized, [src]

pub fn borrow_mut(&mut self) -> &mut T[src]

impl<T> From<T> for T[src]

pub fn from(t: T) -> T[src]

impl<T, U> Into<U> for T where U: From<T>, [src]

pub fn into(self) -> U[src]

impl<T, U> TryFrom<U> for T where U: Into<T>, [src]

type Error = Infallible

pub fn try_from(value: U) -> Result<T, <T as TryFrom<U>>::Error>[src]

impl<T, U> TryInto<U> for T where U: TryFrom<T>, [src]

type Error = <U as TryFrom<T>>::Error

pub fn try_into(self) -> Result<U, <U as TryFrom<T>>::Error>[src]

`impl<H1, H2> KMBloomFilter<H1, H2> where H1: Hasher + Default, H2: Hasher + Default,` [src]

`pub fn new( desired_capacity: usize, desired_false_positive_probability: f64 ) -> Self`[src]

`pub fn approximate_element_count(&self) -> f64`[src]

`pub fn approximate_current_false_positive_probability(&self) -> f64`[src]

`pub fn union(&self, other: &Self) -> Self`[src]

`pub fn intersect(&self, other: &Self) -> Self`[src]

`pub fn eq_configuration(&self, other: &Self) -> bool`[src]

`impl<H1, H2> BloomFilter for KMBloomFilter<H1, H2> where H1: Hasher + Default, H2: Hasher + Default,` [src]

`pub fn insert<T>(&mut self, data: &T) where T: Hash,` [src]

`pub fn contains<T>(&self, data: &T) -> bool where T: Hash,` [src]

`impl<H1, H2> Debug for KMBloomFilter<H1, H2> where H1: Hasher + Default, H2: Hasher + Default,` [src]

`pub fn fmt(&self, f: &mut Formatter<'_>) -> Result`[src]

`impl<H1, H2> RefUnwindSafe for KMBloomFilter<H1, H2> where H1: RefUnwindSafe, H2: RefUnwindSafe,`

`impl<H1, H2> Send for KMBloomFilter<H1, H2> where H1: Send, H2: Send,`

`impl<H1, H2> Sync for KMBloomFilter<H1, H2> where H1: Sync, H2: Sync,`

`impl<H1, H2> Unpin for KMBloomFilter<H1, H2> where H1: Unpin, H2: Unpin,`

`impl<H1, H2> UnwindSafe for KMBloomFilter<H1, H2> where H1: UnwindSafe, H2: UnwindSafe,`

`impl<T> Any for T where T: 'static + ?Sized,` [src]

`pub fn type_id(&self) -> TypeId`[src]

`impl<T> Borrow<T> for T where T: ?Sized,` [src]

`pub fn borrow(&self) -> &T`[src]

`impl<T> BorrowMut<T> for T where T: ?Sized,` [src]

`pub fn borrow_mut(&mut self) -> &mut T`[src]

`impl<T> From<T> for T`[src]

`pub fn from(t: T) -> T`[src]

`impl<T, U> Into<U> for T where U: From<T>,` [src]

`pub fn into(self) -> U`[src]

`impl<T, U> TryFrom<U> for T where U: Into<T>,` [src]

`type Error = Infallible`

`pub fn try_from(value: U) -> Result<T, <T as TryFrom<U>>::Error>`[src]

`impl<T, U> TryInto<U> for T where U: TryFrom<T>,` [src]

`type Error = <U as TryFrom<T>>::Error`

`pub fn try_into(self) -> Result<U, <U as TryFrom<T>>::Error>`[src]