rusty_structures/
bloom_filter.rs

1use crate::{RustyResult};
2
3const BITS_PER_SEGMENT: usize = 8;
4
5#[derive(Debug)]
6pub struct BloomFilter {
7    pub data: Vec<u8>,
8    size: usize,
9    seed: u32,
10    number_of_bits: f64,
11    num_of_hash_functions: f64
12}
13
14impl BloomFilter {
15    
16    pub fn new(max_size: usize, max_tolerance: Option<f32>, seed: Option<u32>) -> RustyResult<BloomFilter> {
17        let max_tolerance = evalexpr::eval(&format!("math::ln({})", max_tolerance.unwrap_or(0.01)))?.as_float()?;
18        let ln2 = evalexpr::eval("math::ln(2)")?.as_float()?;
19        let calc = -(max_size as f64 * max_tolerance / ln2 / ln2).ceil();
20        let num_of_hash_functions = -(max_tolerance / ln2).ceil();
21        let num_of_elements = (calc / BITS_PER_SEGMENT as f64).ceil() as usize;
22        Ok(
23        BloomFilter { 
24            data: vec![0; num_of_elements],
25            seed: seed.unwrap_or(rand::random()),
26            size: 0,
27            number_of_bits: calc,
28            num_of_hash_functions
29        })
30    } 
31
32    pub fn contains(&self, key: &[u8], positions: Option<&[u128]>) -> bool {
33        let tmp_new;
34        let positions = match positions {
35            Some(value) => value,
36            None => {
37                tmp_new = self.key_2_positions(key);
38                &tmp_new
39            }
40        };
41        for position in positions {
42            if !self.read_bit(*position as usize) {
43                return false;
44            }
45        }
46        return true;
47    }
48
49    pub fn insert(&mut self, key: &[u8]) {
50        let positions = self.key_2_positions(key);
51        if !self.contains(key, Some(&positions)) {
52            for position in positions {
53                self.write_bit(position as usize);
54                self.size += 1;
55            }
56        }
57    }
58
59    fn read_bit(&self, index: usize) -> bool {
60        let (element, bit) = self.find_bit_coordinates(index);
61        if let Some(value) = self.data.get(element) {
62            let result = (*value & (1 << bit)) >> bit;
63            return result == 1;
64        }
65        false
66    }
67
68    fn false_positive_probability(&self) -> f64 {
69        (1.0 - std::f64::consts::E.powf(self.num_of_hash_functions * self.size as f64 / self.number_of_bits)).powf(self.num_of_hash_functions)
70    }
71
72    fn write_bit(&mut self, index: usize) {
73        let (element, bit) = self.find_bit_coordinates(index);
74        if let Some(data) = self.data.get_mut(element) {
75            *data = *data | (1_u8 << bit);
76        }
77    }
78
79    fn find_bit_coordinates(&self, index: usize) -> (usize, usize) {
80        let byte_index = index / BITS_PER_SEGMENT;
81        let bit_offset = index % BITS_PER_SEGMENT;
82
83        (byte_index, bit_offset)
84    }
85
86    fn key_2_positions(&self, key: &[u8]) -> Vec<u128> {
87        let murmur_result = fastmurmur3::murmur3_x64_128(key, self.seed) ;
88        let fnv1_result = const_fnv1a_hash::fnv1a_hash_128(key, None);
89
90        (0..(self.num_of_hash_functions as u128))
91        .map(|x| (murmur_result.wrapping_add(x.wrapping_mul(fnv1_result)).wrapping_add(x.wrapping_mul(x))) % BITS_PER_SEGMENT as u128)
92        .collect()
93    }
94}