aabel_bloom_rs/lib.rs
1//! A crate which exposes [`BloomFilter`], an implementation of the [bloom filter]() algorithm.
2//!
3//! # Example
4//!
5//!```
6//! use aabel_bloom_rs::BloomFilter;
7//! use aabel_multihash_rs::{BuildHasherExt, BuildPairHasher};
8//! use std::hash::{BuildHasher, Hash};
9//!
10//! let keys1 = (0, 0);
11//! let keys2 = (1, 1);
12//! let builder = BuildPairHasher::new_with_keys(keys1, keys2);
13//!
14//! let mut filter = BloomFilter::<&str, _>::new(builder);
15//!
16//! // The filter is empty
17//! let item = "Hello world!";
18//! let contains = filter.contains(item);
19//! assert!(!contains);
20//!
21//! // Insert several items in the filter.
22//! filter.insert(item);
23//! filter.insert("Tessting testing");
24//! filter.insert("Rust rocks");
25//! filter.insert("In Rust we trust");
26//!
27//! // Check if the item is in the filter
28//! let contains = filter.contains(item);
29//! assert!(contains)
30//!```
31
32use aabel_multihash_rs::{BuildHasherExt, Hash64, HasherExt};
33use bitvec::array::BitArray;
34use std::{
35 borrow::Borrow,
36 hash::{BuildHasher, Hash},
37 marker::PhantomData,
38};
39
40/// Implements the [bloom filter](https://en.wikipedia.org/wiki/Bloom_filter).
41/// [`B`] is an instance of [`BuildHasherExt`] trait which helps generating multiple hash values for any given item [`T`].
42/// The [`K`] generic argument represents the number of usize cells in the inner array.
43/// The [`H`] generic argument represents the number of hash values computed for each item.
44///
45/// # Example
46pub struct BloomFilter<T, B, const K: usize = 100, const H: usize = 10>
47where
48 T: ?Sized,
49{
50 builder: B,
51 bits: BitArray<[usize; K]>,
52 _marker: PhantomData<T>,
53}
54
55impl<T, B, const K: usize, const H: usize> BloomFilter<T, B, K, H>
56where
57 T: ?Sized,
58 B: BuildHasher + BuildHasherExt,
59{
60 /// Creates a new [`BloomFilter`] instance based on a given [`BuildHasherExt`] instance and a give number of hash values for each item.
61 pub fn new(builder: B) -> Self {
62 Self {
63 builder,
64 bits: BitArray::ZERO,
65 _marker: PhantomData,
66 }
67 }
68}
69
70impl<T, B, const K: usize, const H: usize> BloomFilter<T, B, K, H>
71where
72 B: BuildHasher + BuildHasherExt,
73 <B as BuildHasher>::Hasher: HasherExt,
74 T: Hash + ?Sized,
75{
76 /// Inserts in the filter a new item.
77 ///
78 /// # Example
79 ///```
80 /// use aabel_bloom_rs::*;
81 /// use aabel_multihash_rs::*;
82 ///
83 /// // Create the hasher builder
84 /// let keys1 = (0, 0);
85 /// let keys2 = (1, 1);
86 /// let builder = BuildPairHasher::new_with_keys(keys1, keys2);
87 ///
88 /// // Create the bloom filter
89 /// let mut filter = BloomFilter::<[u8], _>::new(builder);
90 ///
91 /// // Insert in the filter
92 /// filter.insert(&[1u8, 2, 3]);
93 ///```
94 pub fn insert<U>(&mut self, item: &U)
95 where
96 T: Borrow<U>,
97 U: Hash + ?Sized,
98 {
99 let set_bit_for_hash = |hash: Hash64| {
100 let hash: u64 = hash.into();
101 let index = hash as usize % self.bits.len();
102 self.bits.set(index, true);
103 };
104
105 self.builder
106 .hashes_one(item)
107 .take(H)
108 .for_each(set_bit_for_hash);
109 }
110
111 /// Checks if a given item is present in the filter.
112 ///
113 /// # Example
114 ///
115 ///```
116 /// use aabel_bloom_rs::*;
117 /// use aabel_multihash_rs::*;
118 ///
119 /// // Create the hasher builder
120 /// let keys1 = (0, 0);
121 /// let keys2 = (1, 1);
122 /// let builder = BuildPairHasher::new_with_keys(keys1, keys2);
123 ///
124 /// // Create the bloom filter
125 /// let mut filter = BloomFilter::<[u8], _>::new(builder);
126 ///
127 /// // Insert and contains operations on the filter
128 /// filter.insert(&[1u8, 2, 3]);
129 /// let res = filter.contains(&[1u8, 2, 3].as_slice());
130 /// assert!(res)
131 ///
132 ///```
133 pub fn contains<U>(&mut self, item: &U) -> bool
134 where
135 T: Borrow<U>,
136 U: Hash + ?Sized,
137 {
138 let get_bit_for_hash = |hash: Hash64| {
139 let hash: u64 = hash.into();
140 let index = hash as usize % self.bits.len();
141 self.bits[index]
142 };
143
144 self.builder.hashes_one(item).take(H).all(get_bit_for_hash)
145 }
146}
147
148#[cfg(test)]
149mod tests {
150 use super::*;
151 use aabel_multihash_rs::BuildPairHasher;
152
153 #[test]
154 fn insert_contains() {
155 let keys1 = (0, 0);
156 let keys2 = (1, 1);
157 let builder = BuildPairHasher::new_with_keys(keys1, keys2);
158
159 let mut filter = BloomFilter::<&str, _>::new(builder);
160
161 // Insert an item in the bloom filter.
162 let item = "Hello world!";
163 filter.insert(item);
164
165 // Check if the item is in the filter
166 let res = filter.contains(item);
167 assert!(res)
168 }
169}