use serde::{Deserialize, Serialize};
const MASK: [u8; 8] = [1, 2, 4, 8, 16, 32, 64, 128];
pub struct Bloom {
bitset: Vec<i64>,
elem_num: u64,
size_exp: u64,
size: u64,
set_locs: u64,
shift: u64,
}
fn calc_size_by_wrong_positives(num_entries: f64, wrongs: f64) -> (u64, u64) {
let size = -1.0 * num_entries * wrongs.ln() / 0.69314718056_f64.powf(2.0);
let locs = (0.69314718056_f64 * size / num_entries).ceil() ;
return (size as u64, locs as u64);
}
impl Bloom {
pub fn new(num_entries: f64, wrongs: f64) -> Self {
let mut entries = 0;
let mut locs = 0;
if wrongs < 1.0 {
let (e, l) = calc_size_by_wrong_positives(num_entries, wrongs);
entries = e;
locs = l;
} else {
entries = num_entries as u64;
locs = wrongs as u64;
}
let (size, exponent) = getSize(entries);
let mut b = Bloom {
bitset: vec![],
elem_num: 0,
size_exp: exponent,
size: size - 1,
set_locs: locs,
shift: 64 - exponent,
};
b.size(size);
b
}
pub fn add(&mut self, hash: u64) {
let h = hash >> self.shift;
let l = hash << self.shift >> self.shift;
for i in 0..self.set_locs {
self.set((h + (i * l)) & self.size);
self.elem_num += 1;
};
}
pub fn add_if_not_has(&mut self, hash: u64) -> bool {
if self.has(hash) {
return false;
}
self.add(hash);
true
}
pub fn clear(&mut self) {
self.bitset = vec![0; self.bitset.len()]
}
pub fn set(&mut self, idx: u64) {
let mut ptr: *mut i64 = self.bitset.as_mut_ptr();
unsafe {
let step = idx >> 6; ptr = ptr.wrapping_offset(step as isize);
*ptr |= MASK[(idx % 8) as usize] as i64;
};
}
pub fn size(&mut self, sz: u64) {
self.bitset = Vec::with_capacity((sz >> 6) as usize); for i in 0..(sz >> 6) as usize {
self.bitset.insert(i, 0)
}
}
pub fn has(&mut self, hash: u64) -> bool {
let h = hash >> self.shift;
let l = hash << self.shift >> self.shift;
for i in 0..self.set_locs {
if !self.isset((h + (i * l)) & self.size) {
return false;
}
}
true
}
pub fn isset(&mut self, idx: u64) -> bool {
let mut ptr: *mut i64 = self.bitset.as_mut_ptr();
unsafe {
let step = idx >> 6 ;
ptr = ptr.wrapping_offset(step as isize);
}
let r = unsafe { (*ptr >> (idx % 8)) & 1 };
r == 1
}
fn json_encoder(&mut self) -> Vec<u8> {
let mut bj = BloomJsonExport {
set_locs: self.set_locs,
filter_set: vec![0u8; (self.bitset.len() << 3) as usize],
};
for i in 0..bj.filter_set.len() {
let ptr: *mut i64 = self.bitset.as_mut_ptr();
bj.filter_set[i] = unsafe { ptr.wrapping_offset(i as isize) as u8 }
}
let data = serde_json::to_vec(&bj);
if let Ok(result) = data {
return result;
}
vec![]
}
}
#[derive(Serialize, Deserialize, Debug)]
pub struct BloomJsonExport {
filter_set: Vec<u8>,
set_locs: u64,
}
fn getSize(mut u_i64: u64) -> (u64, u64) {
if u_i64 < 512 {
u_i64 = 512;
}
let mut exponent = 0;
let mut size = 1;
while size < u_i64 {
size <<= 1;
exponent += 1;
}
return (size, exponent);
}
#[cfg(test)]
mod tests {
use std::collections::HashSet;
use uuid::Uuid;
use crate::bloom::rutil::mem_hash;
use super::*;
const N: usize = 1 << 16;
fn worldlist() -> Vec<Vec<u8>> {
let _seed = [0u8; 16];
let mut wordlist = Vec::with_capacity(N);
for _i in 0..wordlist.capacity() {
let uuid = Uuid::new_v4();
wordlist.push(Vec::from(uuid.as_bytes().map(|v| v)));
}
wordlist
}
#[test]
fn test_number_of_wrong() {
let mut bf = Bloom::new((N * 10) as f64, 7.0);
let mut cnt = 0;
let word_list = worldlist();
let mut set = HashSet::new();
for i in 0..word_list.len() {
let hash = mem_hash(&*word_list[i]);
set.insert(hash);
if !bf.add_if_not_has(hash.into()) {
cnt += 1;
}
}
assert_eq!(set.len(), word_list.len());
println!("Bloomfilter New(7* 2**16, 7) \
(-> size={} bit): \n \
Check for 'false positives': {}\
wrong positive 'Has' results on 2**16 entries => {} %%\n",
bf.bitset.len() << 6, cnt, (cnt) as f64 / (N) as f64)
}
#[test]
fn test_has() {
let mut bf = Bloom::new((N * 10) as f64, 7.0);
let v = bf.has(18272025040905874063);
assert_eq!(v, false);
let _v = bf.has(18272025040905874063);
bf.add_if_not_has(18272025040905874063);
let v = bf.has(18272025040905874063);
assert_eq!(v, true)
}
#[test]
fn oprator_test() {
let a = 1; let b = 2;
assert_eq!(a & b, 0);
assert_eq!(a | b, 3);
assert_eq!(a ^ b, 3);
assert_eq!(a << 4, 16);
assert_eq!(a >> b, 0);
assert_eq!(31 >> 4, 1);
assert_eq!(31 << 2, 124);
assert_eq!(31 >> 3, 3);
}
}