use crate::bloom::{bitmap::CompressedBitmap, FilterSize, VecBitmap};
use std::collections::hash_map::RandomState;
use std::hash::{BuildHasher, Hash};
use std::marker::PhantomData;
pub trait Bitmap {
fn new_with_capacity(max_key: usize) -> Self;
fn set(&mut self, key: usize, value: bool);
fn get(&self, key: usize) -> bool;
fn byte_size(&self) -> usize;
fn or(&self, other: &Self) -> Self;
}
pub struct BloomFilterBuilder<H, B>
where
H: BuildHasher,
B: Bitmap,
{
hasher: H,
bitmap: B,
key_size: FilterSize,
}
impl std::default::Default for BloomFilterBuilder<RandomState, CompressedBitmap> {
fn default() -> BloomFilterBuilder<RandomState, CompressedBitmap> {
let size = FilterSize::KeyBytes2;
BloomFilterBuilder {
hasher: RandomState::default(),
bitmap: CompressedBitmap::new(key_size_to_bits(size)),
key_size: size,
}
}
}
impl<H, B> BloomFilterBuilder<H, B>
where
H: BuildHasher,
B: Bitmap,
{
pub fn with_bitmap_data(self, bitmap: B, key_size: FilterSize) -> Self {
let _ = bitmap.get(key_size as usize);
Self {
bitmap,
key_size,
..self
}
}
pub fn with_bitmap<U>(self) -> BloomFilterBuilder<H, U>
where
U: Bitmap,
{
BloomFilterBuilder {
hasher: self.hasher,
bitmap: U::new_with_capacity(key_size_to_bits(self.key_size)),
key_size: self.key_size,
}
}
pub fn build<T: Hash>(self) -> Bloom2<H, B, T> {
Bloom2 {
hasher: self.hasher,
bitmap: self.bitmap,
key_size: self.key_size,
_key_type: PhantomData,
}
}
pub fn size(self, size: FilterSize) -> Self {
Self {
key_size: size,
bitmap: B::new_with_capacity(key_size_to_bits(size)),
..self
}
}
}
impl<H> BloomFilterBuilder<H, CompressedBitmap>
where
H: BuildHasher,
{
pub fn hasher(hasher: H) -> Self {
let size = FilterSize::KeyBytes2;
Self {
hasher,
bitmap: CompressedBitmap::new(key_size_to_bits(size)),
key_size: size,
}
}
}
fn key_size_to_bits(k: FilterSize) -> usize {
2_usize.pow(8 * k as u32)
}
#[derive(Debug, Clone, PartialEq)]
pub struct Bloom2<H, B, T>
where
H: BuildHasher,
B: Bitmap,
{
hasher: H,
bitmap: B,
key_size: FilterSize,
_key_type: PhantomData<T>,
}
impl<T> std::default::Default for Bloom2<RandomState, CompressedBitmap, T>
where
T: Hash,
{
fn default() -> Self {
crate::bloom::BloomFilterBuilder::default().build()
}
}
impl<H, B, T> Bloom2<H, B, T>
where
H: BuildHasher,
B: Bitmap,
T: Hash,
{
pub fn insert(&mut self, data: &'_ T) {
self.hasher
.hash_one(data)
.to_be_bytes()
.chunks(self.key_size as usize)
.for_each(|chunk| self.bitmap.set(bytes_to_usize_key(chunk), true));
}
pub fn contains(&self, data: &'_ T) -> bool {
self.hasher
.hash_one(data)
.to_be_bytes()
.chunks(self.key_size as usize)
.any(|chunk| self.bitmap.get(bytes_to_usize_key(chunk)))
}
pub fn union(&mut self, other: &Self) {
assert_eq!(self.key_size, other.key_size);
self.bitmap = self.bitmap.or(&other.bitmap);
}
pub fn byte_size(&mut self) -> usize {
self.bitmap.byte_size()
}
pub fn bitmap(&self) -> &B {
&self.bitmap
}
}
impl<H, T> Bloom2<H, CompressedBitmap, T>
where
H: BuildHasher,
{
pub fn shrink_to_fit(&mut self) {
self.bitmap.shrink_to_fit();
}
}
impl<H, T> Bloom2<H, VecBitmap, T>
where
H: BuildHasher,
{
pub fn compress(self) -> Bloom2<H, CompressedBitmap, T> {
Bloom2::from(self)
}
}
fn bytes_to_usize_key<'a, I: IntoIterator<Item = &'a u8>>(bytes: I) -> usize {
bytes
.into_iter()
.fold(0, |key, &byte| (key << 8) | byte as usize)
}
impl<H, T> From<Bloom2<H, VecBitmap, T>> for Bloom2<H, CompressedBitmap, T>
where
H: BuildHasher,
{
fn from(v: Bloom2<H, VecBitmap, T>) -> Self {
Self {
hasher: v.hasher,
bitmap: CompressedBitmap::from(v.bitmap),
key_size: v.key_size,
_key_type: PhantomData,
}
}
}