bloomfilter_rs/
filter.rs

1use crate::random_sequence;
2use core::iter::zip;
3use fasthash_fork;
4use rand::Rng;
5use std::collections::HashSet;
6use std::time;
7/// Bloom Filter
8pub struct Bloomfilter {
9    /// Storing the 1s if activated by a hash funtion, zero otherwise.
10    bitmap: Vec<u8>,
11    /// The family of hash function thats will be used.
12    seeds_for_hash_function: Vec<u64>,
13    /// The hash function that will be used
14    hash_function: fn(&[u8], u64) -> u64,
15}
16
17impl Bloomfilter {
18    /// Get a new bloom filter with a bitmap of length `n_bits` and `n_hash_functions`
19    /// hash functions.
20    ///
21    /// # Arguments
22    ///
23    /// * `n_bits`: How many bits should the bitmap of the bloomfilter have? More bits make
24    ///     collision (False Positives)  more unlikely.
25    ///
26    /// * `n_hash_functions`: How many hash functions should be used?
27    /// # Examples
28    ///
29    /// ```
30    /// use bloomfilter_rs::filter;
31    ///
32    /// let bloom_filter = filter::Bloomfilter::new(
33    ///     10,
34    ///     20
35    /// );
36    /// ```
37    pub fn new(n_bits: usize, n_hash_functions: usize) -> Self {
38        Bloomfilter {
39            bitmap: vec![0; n_bits],
40            seeds_for_hash_function: get_random_seeds(n_hash_functions),
41            hash_function: |element, seed| fasthash_fork::xx::hash64_with_seed(element, seed),
42        }
43    }
44    /// Generate a bloomfilter with non-random seeds.
45    ///
46    /// # Arguments
47    ///
48    /// * `n_bits`: How many bits should the bitmap of the bloomfilter have? More bits make
49    ///     collision (False Positives)  more unlikely.
50    ///
51    /// * `seeds`: Provide custom seeds to parameterize the hash functions. Given a different seed, the hash function
52    /// would hash the same objects to different hashes. You should not provide the same seeds twice.
53    ///
54    /// # Examples
55    ///
56    /// ```
57    /// use bloomfilter_rs::filter;
58    ///
59    /// let bloom_filter = filter::Bloomfilter::new_with_seeds(
60    ///     10,
61    ///     vec![1, 10, 100],
62    /// );
63    /// ```
64    pub fn new_with_seeds(n_bits: usize, seeds: Vec<u64>) -> Self {
65        Bloomfilter {
66            bitmap: vec![0; n_bits],
67            seeds_for_hash_function: seeds,
68            hash_function: |element, seed| fasthash_fork::xx::hash64_with_seed(element, seed),
69        }
70    }
71    /// Generate a bloomfilter with non-random seeds and a custom hash function.
72    ///
73    /// # Arguments
74    ///
75    /// * `n_bits`: How many bits should the bitmap of the bloomfilter have? More bits make
76    ///     collision (False Positives)  more unlikely.
77    ///
78    /// * `seeds`: Provide custom seeds to parameterize the hash functions. Given a different seed, the hash function
79    /// would hash the same objects to different hashes. You should not provide the same seeds twice.
80    ///
81    /// * `hash_function`: Specify a custom hash function. Hash functions should take in the bytes of the data and a seed to
82    /// parmetrize the behaviour of the hash function. Given different hash values the function should show different behaviour.
83    ///
84    /// # Examples
85    ///
86    /// ```
87    /// use bloomfilter_rs::filter;
88    ///
89    /// let bloom_filter = filter::Bloomfilter::new_with_seeds_and_custom_hash_function(
90    ///     10,
91    ///     vec![1, 10, 100],
92    ///     |bytes, seed| -> u64
93    ///         {(bytes.iter().map(|elem| *elem as u64).sum::<u64>() + seed).into()}
94    /// );
95    /// ```
96    pub fn new_with_seeds_and_custom_hash_function(
97        n_bits: usize,
98        seeds: Vec<u64>,
99        hash_function: fn(&[u8], u64) -> u64,
100    ) -> Self {
101        Bloomfilter {
102            bitmap: vec![0; n_bits],
103            seeds_for_hash_function: seeds,
104            hash_function,
105        }
106    }
107    /// Create a random bloomfilter given its supposed characteristics.
108    ///
109    /// Give the number of expected entries and the acceptable false positive rates, we can create a
110    /// hashfilter that will have the expected characteristic. If we use more entries then expected, the
111    /// false positive rate might decrease.
112    ///
113    /// # Arguments:
114    /// * expected_nubmber_of_entries: How many entries do you expect in the hash filter?
115    /// * acceptable_false_positive_rate: What false positive rate is acceptabel for your use-case?
116    ///
117    ///
118    /// # Examples
119    ///
120    /// ```
121    /// use bloomfilter_rs::filter;
122    ///
123    /// let my_hash_filter = filter::Bloomfilter::from_specification(10, 0.5);
124    /// ```
125    pub fn from_specification(
126        expected_number_of_entries: usize,
127        acceptable_false_positive_rate: f64,
128    ) -> Self {
129        // Formulas taken from https://en.wikipedia.org/wiki/Bloom_filter#Optimal_number_of_hash_functions.
130        let optimal_number_of_bits = (-(expected_number_of_entries as f64
131            * f64::ln(acceptable_false_positive_rate)))
132            / (f64::powi(f64::ln(2.0), 2));
133        let optimal_number_hash_functions = -f64::ln(acceptable_false_positive_rate) / f64::ln(2.0);
134        Bloomfilter::new(
135            optimal_number_of_bits.ceil() as usize,
136            optimal_number_hash_functions.floor() as usize,
137        )
138    }
139    /// How many bits does the bitmap of the filter have?
140    /// # Examples
141    ///
142    /// ```
143    /// use bloomfilter_rs::filter;
144    ///
145    /// let bloom_filter = filter::Bloomfilter::new(
146    ///     10,
147    ///     20
148    /// );
149    /// println!("{}", bloom_filter.number_of_bits())
150    /// ```
151
152    pub fn number_of_bits(&self) -> usize {
153        self.bitmap.len()
154    }
155    /// Insert an object into the bloomfilter. The object should be a string.
156    ///
157    /// If you map higher-level objects (struct ect.) to string, make sure that the representation does exactly
158    /// identify each object (e.g. two different objects cannot map to the same string representation).
159    ///
160    /// # Arguments
161    ///
162    /// * `object`: The object to insert.
163    ///
164    /// # Examples
165    ///
166    /// ```
167    /// use bloomfilter_rs::filter;
168    ///
169    /// let mut bloom_filter = filter::Bloomfilter::new(
170    ///     10,
171    ///     20,
172    /// );
173    /// bloom_filter.insert("To be inserted");
174    /// ```
175
176    pub fn insert(&mut self, object_as_string: &str) {
177        let n_bits = self.number_of_bits();
178        for seed in &self.seeds_for_hash_function {
179            self.bitmap[bytes_to_position_in_bitmap(
180                *seed,
181                object_as_string.as_bytes(),
182                n_bits,
183                self.hash_function,
184            )] = 1;
185        }
186    }
187    /// Insert an object as bytes into the bloomfilter.
188    ///
189    /// # Arguments
190    ///
191    /// * `object_as_bytes`: The object to insert.
192    ///
193    /// # Examples
194    ///
195    /// ```
196    /// use bloomfilter_rs::filter;
197    ///
198    /// let mut bloom_filter = filter::Bloomfilter::new(
199    ///     10,
200    ///     20,
201    /// );
202    /// bloom_filter.insert_object_as_bytes("To be inserted".as_bytes());
203    /// ```
204    pub fn insert_object_as_bytes(&mut self, object_as_bytes: &[u8]) {
205        let n_bits = self.number_of_bits();
206        for seed in &self.seeds_for_hash_function {
207            self.bitmap
208                [bytes_to_position_in_bitmap(*seed, object_as_bytes, n_bits, self.hash_function)] =
209                1;
210        }
211    }
212    /// Test that an object is in the hash filter. The objects needs to be string.
213    ///
214    /// The bloomfilter is probabilistic in nature. False negatives cannot occur (if `bloomfilter.contains`
215    /// returns false, that will always be true), but false positives can occur.
216    ///
217    /// # Arguments
218    ///
219    /// * `object_as_string`: The object to check if it is contained in the bloomfilter.
220    ///
221    /// # Examples
222    ///
223    /// ```
224    /// use bloomfilter_rs::filter;
225    ///
226    /// let mut bloom_filter = filter::Bloomfilter::new(
227    ///     10,
228    ///     20,
229    /// );
230    /// bloom_filter.contains("To be inserted");
231    /// ```
232    pub fn contains(&self, object_as_string: &str) -> bool {
233        let mut bit_map_for_elem: Vec<u8> = vec![0; self.number_of_bits()];
234        for seed in &self.seeds_for_hash_function {
235            bit_map_for_elem[bytes_to_position_in_bitmap(
236                *seed,
237                object_as_string.as_bytes(),
238                self.number_of_bits(),
239                self.hash_function,
240            )] = 1
241        }
242
243        all_elements_bitmap_a_also_in_b(&self.bitmap, &bit_map_for_elem).unwrap()
244    }
245    /// Test that an object as byte-array is in the hash filter.
246    ///
247    /// # Arguments
248    ///
249    /// * `object_as_bytes`: The object to check if it is contained in the bloomfilter.
250    ///
251    /// # Examples
252    ///
253    /// ```
254    /// use bloomfilter_rs::filter;
255    ///
256    /// let mut bloom_filter = filter::Bloomfilter::new(
257    ///     10,
258    ///     20,
259    /// );
260    /// bloom_filter.contains_object_as_bytes("To be inserted".as_bytes());
261    /// ```
262    pub fn contains_object_as_bytes(&self, object_as_bytes: &[u8]) -> bool {
263        let mut bit_map_for_elem: Vec<u8> = vec![0; self.number_of_bits()];
264        for seed in &self.seeds_for_hash_function {
265            bit_map_for_elem[bytes_to_position_in_bitmap(
266                *seed,
267                object_as_bytes,
268                self.number_of_bits(),
269                self.hash_function,
270            )] = 1
271        }
272
273        all_elements_bitmap_a_also_in_b(&self.bitmap, &bit_map_for_elem).unwrap()
274    }
275}
276
277// Utilities used for the bloomfilter-implementation
278/// Generate seeds that are used within the bloom filter
279/// to parameterize hash-functions be have different behaviour.
280/// # Arguments
281///
282/// * `numbers of seeds` - How many seeds should be generated? Should be the same number of needed hash
283/// functions in the bloom filter.
284///
285fn get_random_seeds(number_of_seeds: usize) -> Vec<u64> {
286    (0..number_of_seeds)
287        .map(|_| rand::thread_rng().gen())
288        .collect()
289}
290/// The if bitmap `contains_all` contains all entries that are in `contains_some`
291/// # Arguments
292///
293/// * `contains_all` - Bitmap that should contain all entries.
294/// * `contains_some` - Bitmap that should contain some entries, but only entries from `contains_all`.
295///
296fn all_elements_bitmap_a_also_in_b(
297    contains_all: &Vec<u8>,
298    contains_some: &Vec<u8>,
299) -> Option<bool> {
300    if contains_all.len() != contains_some.len() {
301        return None;
302    }
303    for (elem_in_contains_all, elem_in_contains_some) in zip(contains_all, contains_some) {
304        if *elem_in_contains_all == 0 && *elem_in_contains_some == 1 {
305            return Some(false);
306        }
307    }
308    Some(true)
309}
310/// Given an object of generic type (needs to implement `as_bytes`) hash the elem and map it to a bitmap
311/// of length `number_of_bits`.
312///
313/// # Arguments:
314/// * `seed`: Seed that should be used for the hash function.
315/// * `bytes`: Bytes that should be added to the bloomfilter.
316/// * `number_of_bits`: The number of bits in the bitmap.
317/// * `hash_function`: The hash function that should be used for the mapping. It is assumed to take a reference to a bytes
318/// array (the data that should be hashed) and a seed.
319fn bytes_to_position_in_bitmap(
320    seed: u64,
321    bytes: &[u8],
322    number_of_bits: usize,
323    hash_function: fn(&[u8], u64) -> u64,
324) -> usize {
325    let elem_as_hash = hash_function(bytes, seed);
326    (elem_as_hash % number_of_bits as u64) as usize
327}
328/// This function computes the actual false_positive rate for a bloomfilter.
329/// The formula is taken from `https://en.wikipedia.org/wiki/Bloom_filter#Probability_of_false_positives`
330pub fn compute_false_positive_rate_of_bloomfilter(
331    n_hash_functions: usize,
332    n_bits_in_bitmap: usize,
333    number_of_expected_entries: usize,
334) -> f64 {
335    let inner_most_bracket = 1.0 as f64 - ((1.0 as f64) / (n_bits_in_bitmap as f64));
336    let outer_bracket = 1.0 as f64
337        - f64::powf(
338            inner_most_bracket,
339            (n_hash_functions * number_of_expected_entries) as f64,
340        );
341    f64::powf(outer_bracket, n_hash_functions as f64)
342}
343/// Create a hash-filter given the specification of the function and test
344/// its false positives, false negatives rate and time to insert object /
345/// test that objects are in there.
346///
347/// # Arguments:
348/// * `n_samples_in_bloom_filter`: The number of samples that will be added to the bloomfilter.
349/// * `n_samples_not_in_bloom_filter`: The number of samples that are definitly not in the bloomfilter but
350/// that are used for testing if they the bloomfilter reports them as being in the filter.
351/// * `n_bits_of_bloomfilter`: The number of bits that the bloomfilter has.
352/// * `n_hash_functions_in_bloomfilter`: The number of hash functions that are used in the bloomfilter.
353///
354/// Returns:
355/// * (false_positive_rate, false_negative_rate, insertion_time_per_1000_entries, contains_time_per_1000_entry, expected_false_positive_rate)
356pub fn compute_mistakes_and_execution_time_bloomfilter(
357    n_samples_in_bloom_filter: usize,
358    n_samples_not_in_bloom_filter: usize,
359    n_bits_of_bloomfilter: usize,
360    n_hash_functions_in_bloomfilter: usize,
361) -> (f64, f64, u64, u64, f64) {
362    // Sampling 1000 sequence with 20 elements that will be added to bloom filer
363    let mut sequences_in_bloom_filter: HashSet<Vec<i32>> =
364        HashSet::with_capacity(n_samples_in_bloom_filter);
365    while sequences_in_bloom_filter.len() < n_samples_in_bloom_filter {
366        sequences_in_bloom_filter.insert(random_sequence::random_sequence_from_range(0..100, 20));
367    }
368
369    let mut bloom_filter = Bloomfilter::new(n_bits_of_bloomfilter, n_hash_functions_in_bloomfilter);
370
371    let before = time::Instant::now();
372    for sequence in &sequences_in_bloom_filter {
373        bloom_filter.insert(format!("{:?}", sequence).as_ref())
374    }
375    let total_insertion_time = duration_to_micro_seconds(before.elapsed());
376
377    // Test false negatives rate
378    let mut false_negatives = 0.0;
379    for sequence in &sequences_in_bloom_filter {
380        if !bloom_filter.contains(format!("{:?}", sequence).as_ref()) {
381            false_negatives += 1.0;
382        }
383    }
384
385    // Test false positives rate.
386    let mut false_positives = 0.0;
387    let mut total_contains_time = 0;
388    for _ in 0..n_samples_not_in_bloom_filter {
389        let new_sequence = random_sequence::random_sequence_from_range(0..100, 20);
390        if !sequences_in_bloom_filter.contains(&new_sequence) {
391            let before = time::Instant::now();
392            if bloom_filter.contains(format!("{:?}", new_sequence).as_ref()) {
393                false_positives += 1.0;
394            }
395            total_contains_time += duration_to_micro_seconds(before.elapsed())
396        }
397    }
398    let total_contains_time = total_contains_time;
399
400    (
401        false_positives / n_samples_not_in_bloom_filter as f64,
402        false_negatives / n_samples_in_bloom_filter as f64,
403        total_insertion_time / 1000,
404        total_contains_time / 1000,
405        compute_false_positive_rate_of_bloomfilter(
406            n_hash_functions_in_bloomfilter,
407            n_bits_of_bloomfilter,
408            n_samples_in_bloom_filter,
409        ),
410    )
411}
412/// Repeate the `compute_mistakes_and_execution_time_bloomfilter` and average the
413/// individual result to get more robust estimates.
414pub fn repeat_compute_mistakes_and_execution_time_bloomfilter(
415    n_samples_in_bloom_filter: usize,
416    n_samples_not_in_bloom_filter: usize,
417    n_bits_of_bloomfilter: usize,
418    n_hash_functions_in_bloomfilter: usize,
419    n_repititions: usize,
420) -> (f64, f64, f64, f64, f64) {
421    let mut total_false_positives_rate = 0.0;
422    let mut total_false_negatives_rate = 0.0;
423    let mut total_insertion_time = 0.0;
424    let mut total_contains_time = 0.0;
425    let mut expected_false_positive_rate = 0.0;
426
427    for _ in 0..n_repititions {
428        let (
429            false_positive_rate,
430            false_negative_rate,
431            _total_insertion_time,
432            _total_contains_time,
433            _expected_false_positive_rate,
434        ) = compute_mistakes_and_execution_time_bloomfilter(
435            n_samples_in_bloom_filter,
436            n_samples_not_in_bloom_filter,
437            n_bits_of_bloomfilter,
438            n_hash_functions_in_bloomfilter,
439        );
440        total_false_positives_rate += false_positive_rate;
441        total_false_negatives_rate += false_negative_rate;
442        total_insertion_time += _total_insertion_time as f64;
443        total_contains_time += _total_contains_time as f64;
444        expected_false_positive_rate = _expected_false_positive_rate;
445    }
446    let n_repititions = n_repititions as f64;
447    (
448        total_false_positives_rate / n_repititions,
449        total_false_negatives_rate / n_repititions,
450        total_insertion_time / n_repititions,
451        total_contains_time / n_repititions,
452        expected_false_positive_rate,
453    )
454}
455
456/// Compute the number of micro seconds in a `Duration`-object.
457fn duration_to_micro_seconds(duration: time::Duration) -> u64 {
458    let nanos = duration.subsec_nanos() as u64;
459    (1000 * 1000 * 1000 * duration.as_secs() + nanos) / (1000)
460}
461
462#[cfg(test)]
463pub mod tests {
464    mod test_utilities {
465
466        #[test]
467        fn test_duration_to_micro_seconds() {
468            use super::super::duration_to_micro_seconds;
469            use std::time;
470            assert_eq!(
471                duration_to_micro_seconds(time::Duration::from_micros(100.5 as u64)),
472                100.5 as u64
473            );
474        }
475
476        #[test]
477        /// Test `compute_false_positive_rate_of_bloomfilter` runs through with
478        /// reasonable input values.
479        fn test_compute_mistakes_and_execution_time_bloomfilter() {
480            use super::super::compute_false_positive_rate_of_bloomfilter;
481
482            _ = compute_false_positive_rate_of_bloomfilter(10, 10, 5);
483        }
484        #[test]
485        /// Test `repeate_compute_false_positive_rate_of_bloomfilter` runs through with
486        /// reasonable input values.
487        fn test_repeat_compute_mistakes_and_execution_time_bloomfilter() {
488            use super::super::repeat_compute_mistakes_and_execution_time_bloomfilter;
489
490            _ = repeat_compute_mistakes_and_execution_time_bloomfilter(10, 10, 5, 10, 5);
491        }
492        #[test]
493        fn test_get_random_seed() {
494            // Simply test the function runs through, not much more that can be test
495            // given it stochasticity.
496            use super::super::get_random_seeds;
497            let number_seeds_to_generate = 10;
498            let generated_random_seeds = get_random_seeds(number_seeds_to_generate);
499            assert_eq!(generated_random_seeds.len(), number_seeds_to_generate);
500        }
501        mod test_all_elements_bitmap_a_also_in_b {
502            use super::super::super::all_elements_bitmap_a_also_in_b;
503            #[test]
504            fn different_length() {
505                // Both bitmaps have different length, hence `None` is returned.
506                assert_eq!(
507                    all_elements_bitmap_a_also_in_b(&vec![0, 0, 0, 0], &vec![0, 0, 0]),
508                    None
509                )
510            }
511
512            #[test]
513            fn a_equals_b_both_empty() {
514                // Both bitmaps are entry, therefore all elements in b are also in a
515                assert_eq!(
516                    all_elements_bitmap_a_also_in_b(&vec![0, 0, 0, 0], &vec![0, 0, 0, 0]),
517                    Some(true)
518                )
519            }
520
521            #[test]
522            fn a_equals_b_both_full() {
523                // Both bitmaps are completely full, therefore all elements in b are also in a
524                assert_eq!(
525                    all_elements_bitmap_a_also_in_b(&vec![1, 1, 1, 1], &vec![1, 1, 1, 1]),
526                    Some(true)
527                )
528            }
529
530            #[test]
531            fn a_equals_b_mixed_entries() {
532                // Some elements in bitmap, but the same for a and b therefore all elements in b
533                // are also in a.
534                assert_eq!(
535                    all_elements_bitmap_a_also_in_b(&vec![1, 0, 0, 1], &vec![1, 0, 0, 1]),
536                    Some(true)
537                )
538            }
539
540            #[test]
541            fn a_more_entries_than_b() {
542                // A has more entries in its bitmap, therefore all elements in b are also in a.
543                assert_eq!(
544                    all_elements_bitmap_a_also_in_b(&vec![1, 1, 0, 1], &vec![1, 0, 0, 1]),
545                    Some(true)
546                )
547            }
548
549            #[test]
550            fn b_more_entries_than_a() {
551                // B has one element that, a does not have, therefore return Value is `Some(false)`
552                assert_eq!(
553                    all_elements_bitmap_a_also_in_b(&vec![1, 1, 0, 1], &vec![1, 0, 1, 1]),
554                    Some(false)
555                )
556            }
557        }
558        mod test_elem_to_position_in_bitmap {
559            use super::super::super::bytes_to_position_in_bitmap;
560            /// Small hash function that sums the integer in the bytes array and
561            /// adds the seed. This does not make sense from a hashing perspective,
562            /// and is only used in testing to make the results predictable.
563            fn hash_function_for_testing(bytes: &[u8], seed: u64) -> u64 {
564                (bytes.iter().map(|elem| *elem as u64).sum::<u64>() + seed).into()
565            }
566
567            #[test]
568            /// Test that the hash function just sums all entries and adds
569            /// the seed.
570            fn test_hash_function_for_testing() {
571                assert_eq!(hash_function_for_testing(&vec![1, 2, 3], 5), 11);
572            }
573            #[test]
574            /// Take adding seed to the entries in the vec gives you 3, which given a bit map of size three
575            /// should result in entry 3.
576            fn array_2_entries_sum_3() {
577                assert_eq!(
578                    bytes_to_position_in_bitmap(1, &vec![1, 1], 4, hash_function_for_testing),
579                    3
580                );
581            }
582            #[test]
583            /// Take adding seed to the entries in the vec gives you 3, which given a bit map of size three
584            fn array_3_entries_sum_3() {
585                assert_eq!(
586                    bytes_to_position_in_bitmap(3, &vec![1, 4, 9], 4, hash_function_for_testing),
587                    1
588                );
589            }
590            #[test]
591            /// Add a string as bytes.
592            fn test_with_string_to_bytes() {
593                bytes_to_position_in_bitmap(3, "testing".as_bytes(), 4, hash_function_for_testing);
594            }
595            #[test]
596            /// Add an integer as bytes.
597            fn test_with_integer_as_bytes() {
598                bytes_to_position_in_bitmap(3, &1_u32.to_ne_bytes(), 4, hash_function_for_testing);
599            }
600        }
601        mod test_constructors {
602            use super::super::super::Bloomfilter;
603
604            #[test]
605            /// Test `Bloomfilter::random`-constructor.
606            fn test_random() {
607                let bloom_filter = Bloomfilter::new(10, 20);
608                assert_eq!(bloom_filter.bitmap.len(), 10);
609                assert_eq!(bloom_filter.seeds_for_hash_function.len(), 20);
610            }
611            #[test]
612            /// Test `Bloomfilter::new_with_seeds`-constructor.
613            fn test_new_with_seeds() {
614                let seeds: Vec<u64> = vec![1, 2, 3];
615                let bloom_filter = Bloomfilter::new_with_seeds(10, seeds.clone());
616                assert_eq!(bloom_filter.bitmap.len(), 10);
617                assert_eq!(bloom_filter.seeds_for_hash_function, seeds);
618            }
619            #[test]
620            /// Test `Bloomfilter::new_with_seeds_and_custom_hash_function`-constructor.
621            fn test_new_with_seeds_and_custom_hash_function() {
622                let seeds: Vec<u64> = vec![1, 2, 3];
623                let hash_function = |_object_as_bytes: &[u8], _seed: u64| -> u64 { 2 };
624                let bloom_filter = Bloomfilter::new_with_seeds_and_custom_hash_function(
625                    10,
626                    seeds.clone(),
627                    hash_function,
628                );
629                assert_eq!(bloom_filter.bitmap.len(), 10);
630                assert_eq!(bloom_filter.seeds_for_hash_function, seeds);
631                // Cannot compare closures, therefore call it and check it returns 2.
632                assert_eq!((bloom_filter.hash_function)(&[1, 0, 1,], 4), 2);
633            }
634        }
635        mod test_getter_functions {
636            use super::super::super::Bloomfilter;
637            #[test]
638            fn bloom_filter_with_5_bits() {
639                let bloom_filter = Bloomfilter::new(5, 10);
640                assert_eq!(bloom_filter.number_of_bits(), 5);
641            }
642        }
643
644        mod test_insertion_and_retrivial {
645            use super::super::super::Bloomfilter;
646
647            fn sum_up_bitmap(bitmap: &[u8]) -> u64 {
648                bitmap.iter().map(|indicator| *indicator as u64).sum()
649            }
650            #[test]
651            fn test_insert_string() {
652                let mut bloomfilter = Bloomfilter::new(100, 10);
653                bloomfilter.insert("This is in there");
654
655                // After the `insert`-function was called with 10 hash functions, the sum of the bitmap
656                // has to be between 1..=10. (1 = 10 collisions, e.g. all hash functions mapped to the
657                // same entry, 10 meaning all hash_function mapped this string to a different index
658                // in the bitmap.)
659                let sum_of_entries: u64 = sum_up_bitmap(&bloomfilter.bitmap);
660                assert!((sum_of_entries > 0) & (sum_of_entries <= 10));
661
662                bloomfilter.insert("This is also in here!");
663
664                // After the second insertion there may be between 1 and 20 entries.
665                let sum_of_entries: u64 = sum_up_bitmap(&bloomfilter.bitmap);
666                assert!((sum_of_entries > 0) & (sum_of_entries <= 20));
667            }
668            #[test]
669            fn test_insert_bytes() {
670                let mut bloomfilter = Bloomfilter::new(100, 10);
671                bloomfilter.insert_object_as_bytes(&1_u32.to_ne_bytes());
672
673                // After the `insert_object_as_bytes`-function was called with 10 hash functions, the sum of the bitmap
674                // has to be between 1..=10. (1 = 10 collisions, e.g. all hash functions mapped to the
675                // same entry, 10 meaning all hash_function mapped this string to a different index
676                // in the bitmap.)
677                let sum_of_entries: u64 = sum_up_bitmap(&bloomfilter.bitmap);
678                assert!((sum_of_entries > 0) & (sum_of_entries <= 10));
679
680                bloomfilter.insert_object_as_bytes("This is also in here!".as_bytes());
681
682                // After the second insertion there may be between 1 and 20 entries.
683                let sum_of_entries: u64 = sum_up_bitmap(&bloomfilter.bitmap);
684                assert!((sum_of_entries > 0) & (sum_of_entries <= 20));
685            }
686            #[test]
687            /// Mix the two interfaces `insert`  and `insert_object_as_bytes` and test that the same
688            /// properties still hold for the populated bitmap.
689            fn test_insert_bytes_and_string() {
690                let mut bloomfilter = Bloomfilter::new(100, 10);
691                bloomfilter.insert_object_as_bytes(&1_u32.to_ne_bytes());
692
693                // After the `insert_object_as_bytes`-function was called with 10 hash functions, the sum of the bitmap
694                // has to be between 1..=10. (1 = 10 collisions, e.g. all hash functions mapped to the
695                // same entry, 10 meaning all hash_function mapped this string to a different index
696                // in the bitmap.)
697                let sum_of_entries: u64 = sum_up_bitmap(&bloomfilter.bitmap);
698                assert!((sum_of_entries > 0) & (sum_of_entries <= 10));
699
700                bloomfilter.insert("This is also in here!");
701
702                // After the second insertion there may be between 1 and 20 entries.
703                let sum_of_entries: u64 = sum_up_bitmap(&bloomfilter.bitmap);
704                assert!((sum_of_entries > 0) & (sum_of_entries <= 20));
705            }
706            #[test]
707            fn test_contains() {
708                let mut bloomfilter = Bloomfilter::new(100, 10);
709                bloomfilter.insert("1");
710
711                // Only with the single entry we can guarantee that the bloomfilter does not have
712                // collision, e.g. two different objects get the same representation in the bitmap.
713                assert!(bloomfilter.contains("1"));
714                // Also test that a different object is not in here.
715                assert!(!bloomfilter.contains("2"));
716            }
717            #[test]
718            fn does_not_contain() {
719                let mut bloomfilter = Bloomfilter::new(100, 10);
720                for number_as_string in vec!["1", "2", "3", "4", "5", "6", "7", "8", "9"] {
721                    bloomfilter.insert(number_as_string);
722                }
723                assert!(!bloomfilter.contains("10"));
724            }
725            #[test]
726            fn test_contains_bytes() {
727                let mut bloomfilter = Bloomfilter::new(100, 10);
728                bloomfilter.insert_object_as_bytes("1".as_bytes());
729
730                // Only with the single entry we can guarantee that the bloomfilter does not have
731                // collision, e.g. two different objects get the same representation in the bitmap.
732                assert!(bloomfilter.contains_object_as_bytes("1".as_bytes()));
733                // Also test that a different object is not in here.
734                assert!(!bloomfilter.contains_object_as_bytes("2".as_bytes()));
735            }
736            #[test]
737            fn does_not_contain_bytes() {
738                let mut bloomfilter = Bloomfilter::new(100, 10);
739                for number_as_string in vec!["1", "2", "3", "4", "5", "6", "7", "8", "9"] {
740                    bloomfilter.insert_object_as_bytes(number_as_string.as_bytes());
741                }
742                assert!(!bloomfilter.contains_object_as_bytes("10".as_bytes()));
743            }
744        }
745        mod test_bloomfilter_from_specification {
746            use super::super::super::super::random_sequence;
747            use super::super::super::Bloomfilter;
748            use std::collections;
749
750            #[test]
751            /// Test that `from_specification` with reasonable values
752            /// achieves roughly the expected quality.
753            fn test_from_specification() {
754                let expected_false_positive_rate = 0.1;
755                let number_of_entries_in_bloom_filter = 1000;
756                let number_of_entries_not_in_bloom_filter = 2000;
757                let margin_of_error = 0.2;
758
759                let mut bloom_filter_to_test = Bloomfilter::from_specification(
760                    number_of_entries_in_bloom_filter,
761                    expected_false_positive_rate,
762                );
763
764                // Compute and insert 1000 sequences.
765                let mut sequences_in_bloom_filter: collections::HashSet<Vec<i32>> =
766                    collections::HashSet::with_capacity(number_of_entries_in_bloom_filter);
767                while sequences_in_bloom_filter.len() < number_of_entries_in_bloom_filter {
768                    sequences_in_bloom_filter
769                        .insert(random_sequence::random_sequence_from_range(0..100, 20));
770                }
771                for sequence in &sequences_in_bloom_filter {
772                    bloom_filter_to_test.insert(format!("{:?}", sequence).as_ref())
773                }
774
775                // Test false positives rate.
776                let mut false_positives = 0.0;
777                for _ in 0..number_of_entries_not_in_bloom_filter {
778                    let new_sequence = random_sequence::random_sequence_from_range(0..100, 20);
779                    if !sequences_in_bloom_filter.contains(&new_sequence) {
780                        if bloom_filter_to_test.contains(format!("{:?}", new_sequence).as_ref()) {
781                            false_positives += 1.0;
782                        }
783                    }
784                }
785                let actual_false_positive_rate =
786                    false_positives / number_of_entries_not_in_bloom_filter as f64;
787                println!("{}", actual_false_positive_rate);
788                assert!(
789                    (actual_false_positive_rate * (1.0 + margin_of_error)
790                        > expected_false_positive_rate)
791                        & (actual_false_positive_rate * (1.0 - margin_of_error)
792                            < expected_false_positive_rate)
793                )
794            }
795        }
796    }
797}