aabel_bloom_rs/
lib.rs

1//! A crate which exposes [`BloomFilter`], an implementation of the [bloom filter]() algorithm.
2//!
3//! # Example
4//!
5//!```
6//! use aabel_bloom_rs::BloomFilter;
7//! use aabel_multihash_rs::{BuildHasherExt, BuildPairHasher};
8//! use std::hash::{BuildHasher, Hash};
9//!
10//! let keys1 = (0, 0);
11//! let keys2 = (1, 1);
12//! let builder = BuildPairHasher::new_with_keys(keys1, keys2);
13//!
14//! let mut filter = BloomFilter::<&str, _>::new(builder);
15//!
16//! // The filter is empty
17//! let item = "Hello world!";
18//! let contains = filter.contains(item);
19//! assert!(!contains);
20//!
21//! // Insert several items in the filter.
22//! filter.insert(item);
23//! filter.insert("Tessting testing");
24//! filter.insert("Rust rocks");
25//! filter.insert("In Rust we trust");
26//!
27//! // Check if the item is in the filter
28//! let contains = filter.contains(item);
29//! assert!(contains)
30//!```
31
32use aabel_multihash_rs::{BuildHasherExt, Hash64, HasherExt};
33use bitvec::array::BitArray;
34use std::{
35    borrow::Borrow,
36    hash::{BuildHasher, Hash},
37    marker::PhantomData,
38};
39
40/// Implements the [bloom filter](https://en.wikipedia.org/wiki/Bloom_filter).
41/// [`B`] is an instance of [`BuildHasherExt`] trait which helps generating multiple hash values for any given item [`T`].
42/// The [`K`] generic argument represents the number of usize cells in the inner array.
43/// The [`H`] generic argument represents the number of hash values computed for each item.
44///
45/// # Example
46pub struct BloomFilter<T, B, const K: usize = 100, const H: usize = 10>
47where
48    T: ?Sized,
49{
50    builder: B,
51    bits: BitArray<[usize; K]>,
52    _marker: PhantomData<T>,
53}
54
55impl<T, B, const K: usize, const H: usize> BloomFilter<T, B, K, H>
56where
57    T: ?Sized,
58    B: BuildHasher + BuildHasherExt,
59{
60    /// Creates a new [`BloomFilter`] instance based on a given [`BuildHasherExt`] instance and a give number of hash values for each item.
61    pub fn new(builder: B) -> Self {
62        Self {
63            builder,
64            bits: BitArray::ZERO,
65            _marker: PhantomData,
66        }
67    }
68}
69
70impl<T, B, const K: usize, const H: usize> BloomFilter<T, B, K, H>
71where
72    B: BuildHasher + BuildHasherExt,
73    <B as BuildHasher>::Hasher: HasherExt,
74    T: Hash + ?Sized,
75{
76    /// Inserts in the filter a new item.
77    ///
78    /// # Example
79    ///```
80    /// use aabel_bloom_rs::*;
81    /// use aabel_multihash_rs::*;
82    ///
83    /// // Create the hasher builder
84    /// let keys1 = (0, 0);
85    /// let keys2 = (1, 1);
86    /// let builder = BuildPairHasher::new_with_keys(keys1, keys2);
87    ///
88    /// // Create the bloom filter
89    /// let mut filter = BloomFilter::<[u8], _>::new(builder);
90    ///
91    /// // Insert in the filter
92    /// filter.insert(&[1u8, 2, 3]);
93    ///```
94    pub fn insert<U>(&mut self, item: &U)
95    where
96        T: Borrow<U>,
97        U: Hash + ?Sized,
98    {
99        let set_bit_for_hash = |hash: Hash64| {
100            let hash: u64 = hash.into();
101            let index = hash as usize % self.bits.len();
102            self.bits.set(index, true);
103        };
104
105        self.builder
106            .hashes_one(item)
107            .take(H)
108            .for_each(set_bit_for_hash);
109    }
110
111    /// Checks if a given item is present in the filter.
112    ///
113    /// # Example
114    ///
115    ///```
116    /// use aabel_bloom_rs::*;
117    /// use aabel_multihash_rs::*;
118    ///
119    /// // Create the hasher builder
120    /// let keys1 = (0, 0);
121    /// let keys2 = (1, 1);
122    /// let builder = BuildPairHasher::new_with_keys(keys1, keys2);
123    ///
124    /// // Create the bloom filter
125    /// let mut filter = BloomFilter::<[u8], _>::new(builder);
126    ///
127    /// // Insert and contains operations on the filter
128    /// filter.insert(&[1u8, 2, 3]);
129    /// let res = filter.contains(&[1u8, 2, 3].as_slice());
130    /// assert!(res)
131    ///
132    ///```
133    pub fn contains<U>(&mut self, item: &U) -> bool
134    where
135        T: Borrow<U>,
136        U: Hash + ?Sized,
137    {
138        let get_bit_for_hash = |hash: Hash64| {
139            let hash: u64 = hash.into();
140            let index = hash as usize % self.bits.len();
141            self.bits[index]
142        };
143
144        self.builder.hashes_one(item).take(H).all(get_bit_for_hash)
145    }
146}
147
148#[cfg(test)]
149mod tests {
150    use super::*;
151    use aabel_multihash_rs::BuildPairHasher;
152
153    #[test]
154    fn insert_contains() {
155        let keys1 = (0, 0);
156        let keys2 = (1, 1);
157        let builder = BuildPairHasher::new_with_keys(keys1, keys2);
158
159        let mut filter = BloomFilter::<&str, _>::new(builder);
160
161        // Insert an item in the bloom filter.
162        let item = "Hello world!";
163        filter.insert(item);
164
165        // Check if the item is in the filter
166        let res = filter.contains(item);
167        assert!(res)
168    }
169}