rusty-structures 0.1.0

An experimental data structures and algorithms library.
Documentation
use crate::{RustyResult};

const BITS_PER_SEGMENT: usize = 8;

#[derive(Debug)]
pub struct BloomFilter {
    pub data: Vec<u8>,
    size: usize,
    seed: u32,
    number_of_bits: f64,
    num_of_hash_functions: f64
}

impl BloomFilter {
    
    pub fn new(max_size: usize, max_tolerance: Option<f32>, seed: Option<u32>) -> RustyResult<BloomFilter> {
        let max_tolerance = evalexpr::eval(&format!("math::ln({})", max_tolerance.unwrap_or(0.01)))?.as_float()?;
        let ln2 = evalexpr::eval("math::ln(2)")?.as_float()?;
        let calc = -(max_size as f64 * max_tolerance / ln2 / ln2).ceil();
        let num_of_hash_functions = -(max_tolerance / ln2).ceil();
        let num_of_elements = (calc / BITS_PER_SEGMENT as f64).ceil() as usize;
        Ok(
        BloomFilter { 
            data: vec![0; num_of_elements],
            seed: seed.unwrap_or(rand::random()),
            size: 0,
            number_of_bits: calc,
            num_of_hash_functions
        })
    } 

    pub fn contains(&self, key: &[u8], positions: Option<&[u128]>) -> bool {
        let tmp_new;
        let positions = match positions {
            Some(value) => value,
            None => {
                tmp_new = self.key_2_positions(key);
                &tmp_new
            }
        };
        for position in positions {
            if !self.read_bit(*position as usize) {
                return false;
            }
        }
        return true;
    }

    pub fn insert(&mut self, key: &[u8]) {
        let positions = self.key_2_positions(key);
        if !self.contains(key, Some(&positions)) {
            for position in positions {
                self.write_bit(position as usize);
                self.size += 1;
            }
        }
    }

    fn read_bit(&self, index: usize) -> bool {
        let (element, bit) = self.find_bit_coordinates(index);
        if let Some(value) = self.data.get(element) {
            let result = (*value & (1 << bit)) >> bit;
            return result == 1;
        }
        false
    }

    fn false_positive_probability(&self) -> f64 {
        (1.0 - std::f64::consts::E.powf(self.num_of_hash_functions * self.size as f64 / self.number_of_bits)).powf(self.num_of_hash_functions)
    }

    fn write_bit(&mut self, index: usize) {
        let (element, bit) = self.find_bit_coordinates(index);
        if let Some(data) = self.data.get_mut(element) {
            *data = *data | (1_u8 << bit);
        }
    }

    fn find_bit_coordinates(&self, index: usize) -> (usize, usize) {
        let byte_index = index / BITS_PER_SEGMENT;
        let bit_offset = index % BITS_PER_SEGMENT;

        (byte_index, bit_offset)
    }

    fn key_2_positions(&self, key: &[u8]) -> Vec<u128> {
        let murmur_result = fastmurmur3::murmur3_x64_128(key, self.seed) ;
        let fnv1_result = const_fnv1a_hash::fnv1a_hash_128(key, None);

        (0..(self.num_of_hash_functions as u128))
        .map(|x| (murmur_result.wrapping_add(x.wrapping_mul(fnv1_result)).wrapping_add(x.wrapping_mul(x))) % BITS_PER_SEGMENT as u128)
        .collect()
    }
}