1extern crate bigint;
2extern crate bit_vec;
3extern crate fasthash;
4
5use bigint::uint::U512;
6use bit_vec::BitVec;
7use fasthash::murmur3::hash128;
8use std::ops::Add;
9
10pub struct BloomFilter {
14 capacity: usize,
15 bitvec: BitVec,
16 error_rate: f64,
17 num_of_hashfuncs: usize,
18 num_of_elements: usize,
19 dup_check: bool,
20}
21
22#[inline(always)]
25fn nbits(n: usize, p: f64) -> usize {
26 let numerator = n as f64 * p.ln();
27 let denominator = (1.0_f64 / 2.0_f64.powf(2.0_f64.ln())).ln();
28 (numerator / denominator).ceil() as usize
29}
30
31#[inline(always)]
34fn iterations(m: usize, n: usize) -> usize {
35 ((m as f64 / n as f64) * 2.0_f64.ln()).round() as usize
36}
37
38impl BloomFilter {
39 pub fn new(capacity: usize, error_rate: f64, dup_check: bool) -> BloomFilter {
40 if capacity == 0 {
41 panic!("capacity must be greater than zero");
42 }
43 if error_rate <= 0.0 || error_rate > 1.0 {
44 panic!("error_rate must be greater than 0.0 and less than 1.0");
45 }
46 let num_of_bits = nbits(capacity, error_rate);
47 let num_of_hashfuncs = iterations(num_of_bits, capacity);
48 BloomFilter {
49 bitvec: BitVec::from_elem(num_of_bits, false),
50 capacity,
51 error_rate,
52 num_of_hashfuncs,
53 num_of_elements: 0,
54 dup_check,
55 }
56 }
57 pub fn new_with_default_error_rate(capacity:usize, dup_check:bool) -> BloomFilter{
59 if capacity == 0 {
60 panic!("capacity must be greater than zero");
61 }
62 BloomFilter::new(capacity, 0.01, dup_check)
63 }
64
65 pub fn bitvec_len(&self) -> usize {
66 self.bitvec.len()
67 }
68
69 pub fn capacity(&self) -> usize {
70 self.capacity
71 }
72
73 pub fn len(&self) -> usize {
74 self.num_of_elements
75 }
76
77 pub fn error_rate(&self) -> f64 {
78 self.error_rate
79 }
80 pub fn is_empty(&self) -> bool {
81 self.len() == 0
82 }
83
84 pub fn add(&mut self, data: &[u8]) -> Result<bool, &'static str> {
85 if self.num_of_elements == self.capacity {
86 return Err("You are adding to the bloom filter that is already full");
87 }
88 let hash = hash128(data);
89 let hash64_first = (hash & (2_u128.pow(64) - 1)) as u64;
90 let hash64_second = (hash >> 64) as u64;
91 let mut result_hash: U512 = hash64_first.into();
92 let mut exists = true;
93 for value in 0..self.num_of_hashfuncs {
94 let temp: U512 = U512::from(value) * U512::from(hash64_second);
95 result_hash = result_hash.add(temp);
96 let index = result_hash % U512::from(self.bitvec_len());
97 if self.dup_check && self.bitvec.get(index.as_u64() as usize) == Some(false) {
98 exists = false;
99 }
100 self.bitvec.set(index.as_u64() as usize, true);
101 }
102 if self.dup_check && exists {
103 return Ok(false);
104 }
105 self.num_of_elements += 1;
106 return Ok(true);
107 }
108
109 pub fn contains(&self, data: &[u8]) -> bool {
110 let hash = hash128(data);
111 let hash64_first = (hash & (2_u128.pow(64) - 1)) as u64;
112 let hash64_second = (hash >> 64) as u64;
113 let mut result_hash: U512 = hash64_first.into();
114 for value in 0..self.num_of_hashfuncs {
115 let temp: U512 = U512::from(value) * U512::from(hash64_second);
116 result_hash = result_hash.add(temp);
117 let index = result_hash % U512::from(self.bitvec_len());
118 if self.bitvec.get(index.as_u64() as usize) == Some(false) {
119 return false;
120 }
121 }
122 true
123 }
124}
125
126#[cfg(test)]
127mod tests {
128 use crate::BloomFilter;
129
130 #[test]
131 fn test_single_element() {
132 let mut b = BloomFilter::new(20000, 0.01, true);
133 assert_eq!(b.add("Test".as_bytes()).unwrap(), true);
134 assert!(b.contains("Test".as_bytes()));
135 }
136
137 #[test]
138 fn test_multiple_types() {
139 let mut b = BloomFilter::new(20000, 0.01, true);
140 assert_eq!(b.add("Test".as_bytes()).unwrap(), true);
141 assert!(b.contains("Test".as_bytes()));
142 assert_eq!(b.add(&1_u8.to_ne_bytes()).unwrap(), true);
143 assert!(b.contains(&1_u8.to_ne_bytes()));
144 assert_eq!(b.add(&1_i32.to_ne_bytes()).unwrap(), true);
145 assert!(b.contains(&1_i32.to_ne_bytes()));
146 assert_eq!(b.add(&1_i64.to_ne_bytes()).unwrap(), true);
147 assert!(b.contains(&1_i64.to_ne_bytes()));
148 assert_eq!(b.add(&1.0_f32.to_bits().to_be_bytes()).unwrap(), true);
149 assert!(b.contains(&1.0_f32.to_bits().to_be_bytes()));
150 assert_eq!(b.add(&12131231231231321123.0_f64.to_bits().to_be_bytes()).unwrap(), true);
151 assert!(b.contains(&12131231231231321123.0_f64.to_bits().to_be_bytes()));
152 assert_eq!(b.add(&1.33333333333333333_f32.to_bits().to_be_bytes()).unwrap(), true);
153 assert!(b.contains(&1.33333333333333333_f32.to_bits().to_be_bytes()));
154 }
155
156 #[test]
157 #[should_panic]
158 fn test_empty_bloom_zero_capacity_filter() {
159 let _b = BloomFilter::new(0, 0.01, true);
160 }
161 #[test]
162 #[should_panic]
163 fn test_empty_bloom_zero_error_rate_filter() {
164 let _b = BloomFilter::new(10, 0.000, true);
165 }
166
167 #[test]
168 #[should_panic]
169 fn test_empty_bloom_negative_error_rate_filter() {
170 let _b = BloomFilter::new(10, -0.010, true);
171 }
172
173 #[test]
174 fn test_full_bloom_filter() {
175 let mut b = BloomFilter::new(10, 0.01, true);
176 let elements = vec![
178 "April is the cruellest month, breeding",
179 "Lilacs out of the dead land, mixing",
180 "Memory and desire, stirring",
181 "Dull roots with spring rain.",
182 "Winter kept us warm, covering",
183 "Earth in forgetful snow, feeding",
184 "A little life with dried tubers.",
185 "Summer surprised us, coming over the Starnbergersee",
186 "With a shower of rain; we stopped in the colonnade,",
187 "And went on in sunlight, into the Hofgarten,",
188 "And drank coffee, and talked for an hour.",
189 ];
190 for element in &elements[..9] {
191 assert_eq!(b.add(element.as_bytes()).unwrap(), true);
192 }
193 assert!(
194 b.add((&elements[10]).as_ref()).unwrap(),
195 "You are adding to the bloom filter that is full"
196 );
197 }
198
199 #[test]
200 fn test_multiple_elements() {
201 let mut b = BloomFilter::new(20000, 0.01, true);
202 let elements = vec![
203 "Bin gar keine Russin, stamm’ aus Litauen, echt deutsch.",
204 "And when we were children, staying at the arch-duke’s,",
205 "My cousin’s, he took me out on a sled,",
206 "And I was frightened. He said, Marie,",
207 "Marie, hold on tight. And down we went.",
208 "In the mountains, there you feel free.",
209 "I read, much of the night, and go south in the winter.",
210 ];
211 for element in &elements {
212 b.add(element.as_bytes()).unwrap();
213 }
214 for element in &elements {
215 assert!(b.contains(element.as_bytes()));
216 }
217 assert_eq!(
218 b.contains("What are the roots that clutch, what branches grow".as_bytes()),
219 false
220 );
221 assert_eq!(elements.len(), b.len())
222 }
223 #[test]
224 fn test_multiple_duplicate_elements() {
225 let mut b = BloomFilter::new(20000, 0.01, true);
226 let elements = vec![
227 "Out of this stony rubbish? Son of man,",
228 "Out of this stony rubbish? Son of man,",
229 "You cannot say, or guess, for you know only",
230 "You cannot say, or guess, for you know only",
231 ];
232 assert_eq!(b.add(elements[0].as_bytes()).unwrap(), true);
233 assert_eq!(b.len(), 1);
234 assert_eq!(b.add(elements[1].as_bytes()).unwrap(), false);
235 assert_eq!(b.len(), 1);
236 assert_eq!(b.add(elements[2].as_bytes()).unwrap(), true);
237 assert_eq!(b.len(), 2);
238 assert_eq!(b.add(elements[3].as_bytes()).unwrap(), false);
239 assert_eq!(b.len(), 2);
240 }
241
242 #[test]
243 fn test_multiple_duplicate_elements_with_dup_check_false() {
244 let mut b = BloomFilter::new(20000, 0.01, false);
245 let elements = vec![
246 "A heap of broken images, where the sun beats,",
247 "A heap of broken images, where the sun beats,",
248 "And the dead tree gives no shelter, the cricket no relief,",
249 "And the dead tree gives no shelter, the cricket no relief,",
250 ];
251 assert_eq!(b.add(elements[0].as_bytes()).unwrap(), true);
252 assert_eq!(b.len(), 1);
253 assert_eq!(b.add(elements[1].as_bytes()).unwrap(), true);
254 assert_eq!(b.len(), 2);
255 assert_eq!(b.add(elements[2].as_bytes()).unwrap(), true);
256 assert_eq!(b.len(), 3);
257 assert_eq!(b.add(elements[3].as_bytes()).unwrap(), true);
258 assert_eq!(b.len(), 4);
259 for i in vec![
260 "A heap of broken images, where the sun beats,",
261 "And the dead tree gives no shelter, the cricket no relief,",
262 ] {
263 assert!(b.contains(i.as_bytes()))
264 }
265 }
266
267 #[test]
268 fn test_new_with_default_error_rate() {
269 let mut b = BloomFilter::new_with_default_error_rate(20000, true);
270 assert_eq!(b.add("Test".as_bytes()).unwrap(), true);
271 assert!(b.contains("Test".as_bytes()));
272 }
273}