rusty_structures/
bloom_filter.rs1use crate::{RustyResult};
2
3const BITS_PER_SEGMENT: usize = 8;
4
5#[derive(Debug)]
6pub struct BloomFilter {
7 pub data: Vec<u8>,
8 size: usize,
9 seed: u32,
10 number_of_bits: f64,
11 num_of_hash_functions: f64
12}
13
14impl BloomFilter {
15
16 pub fn new(max_size: usize, max_tolerance: Option<f32>, seed: Option<u32>) -> RustyResult<BloomFilter> {
17 let max_tolerance = evalexpr::eval(&format!("math::ln({})", max_tolerance.unwrap_or(0.01)))?.as_float()?;
18 let ln2 = evalexpr::eval("math::ln(2)")?.as_float()?;
19 let calc = -(max_size as f64 * max_tolerance / ln2 / ln2).ceil();
20 let num_of_hash_functions = -(max_tolerance / ln2).ceil();
21 let num_of_elements = (calc / BITS_PER_SEGMENT as f64).ceil() as usize;
22 Ok(
23 BloomFilter {
24 data: vec![0; num_of_elements],
25 seed: seed.unwrap_or(rand::random()),
26 size: 0,
27 number_of_bits: calc,
28 num_of_hash_functions
29 })
30 }
31
32 pub fn contains(&self, key: &[u8], positions: Option<&[u128]>) -> bool {
33 let tmp_new;
34 let positions = match positions {
35 Some(value) => value,
36 None => {
37 tmp_new = self.key_2_positions(key);
38 &tmp_new
39 }
40 };
41 for position in positions {
42 if !self.read_bit(*position as usize) {
43 return false;
44 }
45 }
46 return true;
47 }
48
49 pub fn insert(&mut self, key: &[u8]) {
50 let positions = self.key_2_positions(key);
51 if !self.contains(key, Some(&positions)) {
52 for position in positions {
53 self.write_bit(position as usize);
54 self.size += 1;
55 }
56 }
57 }
58
59 fn read_bit(&self, index: usize) -> bool {
60 let (element, bit) = self.find_bit_coordinates(index);
61 if let Some(value) = self.data.get(element) {
62 let result = (*value & (1 << bit)) >> bit;
63 return result == 1;
64 }
65 false
66 }
67
68 fn false_positive_probability(&self) -> f64 {
69 (1.0 - std::f64::consts::E.powf(self.num_of_hash_functions * self.size as f64 / self.number_of_bits)).powf(self.num_of_hash_functions)
70 }
71
72 fn write_bit(&mut self, index: usize) {
73 let (element, bit) = self.find_bit_coordinates(index);
74 if let Some(data) = self.data.get_mut(element) {
75 *data = *data | (1_u8 << bit);
76 }
77 }
78
79 fn find_bit_coordinates(&self, index: usize) -> (usize, usize) {
80 let byte_index = index / BITS_PER_SEGMENT;
81 let bit_offset = index % BITS_PER_SEGMENT;
82
83 (byte_index, bit_offset)
84 }
85
86 fn key_2_positions(&self, key: &[u8]) -> Vec<u128> {
87 let murmur_result = fastmurmur3::murmur3_x64_128(key, self.seed) ;
88 let fnv1_result = const_fnv1a_hash::fnv1a_hash_128(key, None);
89
90 (0..(self.num_of_hash_functions as u128))
91 .map(|x| (murmur_result.wrapping_add(x.wrapping_mul(fnv1_result)).wrapping_add(x.wrapping_mul(x))) % BITS_PER_SEGMENT as u128)
92 .collect()
93 }
94}