1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
// A Rust BloomFilter implementation.
// Copywrite (c) 2016 Nick Lanham

// This program is free software; you can redistribute it and/or
// modify it under the terms of the GNU General Public License as
// published by the Free Software Foundation; either version 2 of the
// License, or (at your option) any later version.

// This program is distributed in the hope that it will be useful, but
// WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
// General Public License for more details.

// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
// 02110-1301, USA.


//! An implementation of various Approximate Set Membership structures
//! in Rust.  Currently included are a standard Bloom Filter, and the
//! simplest kind of Counting Bloom Filter.
//!
//! # Usage
//!
//! This crate is [on crates.io](https://crates.io/crates/rand) and
//! can be used by adding `bloom` to the dependencies in your
//! project's `Cargo.toml`.
//!
//! ```toml
//! [dependencies]
//! bloom = "0.2.0"
//! ```
//!
//! add this to your crate root:
//!
//! ```rust
//! extern crate bloom;
//! ```
//!
//! # Bloom Filters
//!
//! A Bloom Filter is an Approximate Set Membership structure, which
//! means it can track a set of items and check if an item is a member
//! of the set it is tracking.  It is able to do this using a much
//! smaller amount of memory than storing the actual items, at the
//! cost of an occasionally indicating that an item is in the set even
//! though it is not.  This occurence is called a "False Positive".  A
//! traditional Bloom Filter will never have a "False Negative"
//! however, which would be indicating that an item is *not* in the
//! set, when in fact it is.  The frequency of false positives can be
//! preciecly bounded by setting the size of the filter, and is called
//! the False Positive Rate.  Their small memory footprint and absence
//! of false negatives makes BloomFilters suitable for many
//! applications.
//!
//! # Example Usage
//!
//! ```rust
//! use bloom::{ASMS,BloomFilter};
//!
//! let expected_num_items = 1000;
//!
//! // out of 100 items that are not inserted, expect 1 to return true for contain
//! let false_positive_rate = 0.01;
//!
//! let mut filter = BloomFilter::with_rate(false_positive_rate,expected_num_items);
//! filter.insert(&1);
//! filter.contains(&1); /* true */
//! filter.contains(&2); /* probably false */
//! ```
//!
//! # Counting Bloom Filters
//!
//! Counting filters allow removal from a Bloom filter without
//! recreating the filter afresh. A counting filter uses an n-bit
//! counter where a standard Bloom Filter uses a single bit.  Counting
//! filters can also provide an upper bound on the number of times a
//! particular element has been inserted into the filter.  In general
//! 4 bits per element is considered a good size.  This will cause the
//! filter to use 4 times as much memory as compared to standard Bloom
//! Filter.
//!
//! # Example Usage
//!
//! ```rust
//! use bloom::{ASMS,CountingBloomFilter};
//! // Create a counting filter that uses 4 bits per element and has a false positive rate
//! // of 0.01 when 100 items have been inserted
//! let mut cbf:CountingBloomFilter = CountingBloomFilter::with_rate(4,0.01,100);
//! cbf.insert(&1);
//! cbf.insert(&2);
//! assert_eq!(cbf.estimate_count(&1),1);
//! assert_eq!(cbf.estimate_count(&2),1);
//! assert_eq!(cbf.insert_get_count(&1),1);
//! assert_eq!(cbf.estimate_count(&1),2);
//! assert_eq!(cbf.remove(&1),2);
//! assert_eq!(cbf.estimate_count(&1),1);
//! ```


#![crate_name="bloom"]
#![crate_type = "rlib"]

#![cfg_attr(feature = "do-bench", feature(test))]

extern crate core;
extern crate bit_vec;
use std::hash::Hash;

mod hashing;

pub mod bloom;
pub use bloom::{BloomFilter,optimal_num_hashes,needed_bits};

pub mod counting;
pub use counting::CountingBloomFilter;

pub mod valuevec;
pub use valuevec::ValueVec;

/// Stanard filter functions
pub trait ASMS {
    fn insert<T: Hash>(& mut self,item: &T) -> bool;
    fn contains<T: Hash>(&self, item: &T) -> bool;
    fn clear(&mut self);
}

/// Filters that implement this trait can be intersected with filters
/// of the same type to produce a filter that contains the
/// items that have been inserted into *both* filters.
///
/// Both filters MUST be the same size and be using the same hash
/// functions for this to work.  Will panic if the filters are not the
/// same size, but will simply produce incorrect (meaningless) results
/// if the filters are using different hash functions.
pub trait Intersectable {
    fn intersect(&mut self, other: &Self) -> bool;
}

/// Filters that implement this trait can be unioned with filters
/// of the same type to produce a filter that contains the
/// items that have been inserted into *either* filter.
///
/// Both filters MUST be the same size and be using the same hash
/// functions for this to work.  Will panic if the filters are not the
/// same size, but will simply produce incorrect (meaningless) results
/// if the filters are using different hash functions.
pub trait Unionable {
    fn union(&mut self, other: &Self) -> bool;
}

/// Filters than are Combineable can be unioned and intersected
pub trait Combineable: Intersectable + Unionable {}
impl<T> Combineable for T where T: Intersectable + Unionable {}