use crate::{BlockedBitVec, BloomFilter, BuildHasher, DefaultHasher};
use std::hash::Hash;
use crate::signature;
#[derive(Debug, Clone)]
pub struct Builder<const BLOCK_SIZE_BITS: usize = 512, S = DefaultHasher> {
pub(crate) data: BlockedBitVec<BLOCK_SIZE_BITS>,
pub(crate) hasher: S,
}
impl<const BLOCK_SIZE_BITS: usize> Builder<BLOCK_SIZE_BITS> {
pub fn seed(mut self, seed: &u128) -> Self {
self.hasher = DefaultHasher::seeded(&seed.to_be_bytes());
self
}
}
impl<const BLOCK_SIZE_BITS: usize, S: BuildHasher> Builder<BLOCK_SIZE_BITS, S> {
pub fn hasher<H: BuildHasher>(self, hasher: H) -> Builder<BLOCK_SIZE_BITS, H> {
Builder::<BLOCK_SIZE_BITS, H> {
data: self.data,
hasher,
}
}
pub fn hashes(self, num_hashes: u32) -> BloomFilter<BLOCK_SIZE_BITS, S> {
self.hashes_f(num_hashes as f64)
}
fn hashes_f(self, total_num_hashes: f64) -> BloomFilter<BLOCK_SIZE_BITS, S> {
let (num_hashes, num_rounds) =
signature::optimize_hashing(total_num_hashes, BLOCK_SIZE_BITS);
BloomFilter {
bits: self.data,
target_hashes: total_num_hashes as u64,
num_hashes,
num_rounds,
hasher: self.hasher,
}
}
pub fn expected_items(self, expected_num_items: usize) -> BloomFilter<BLOCK_SIZE_BITS, S> {
let items_per_block = expected_num_items as f64 / self.data.num_blocks() as f64;
let num_hashes = BloomFilter::<BLOCK_SIZE_BITS>::optimal_hashes_f(items_per_block);
self.hashes_f(num_hashes)
}
pub fn items<I: IntoIterator<IntoIter = impl ExactSizeIterator<Item = impl Hash>>>(
self,
items: I,
) -> BloomFilter<BLOCK_SIZE_BITS, S> {
let into_iter = items.into_iter();
let mut filter = self.expected_items(into_iter.len());
filter.extend(into_iter);
filter
}
}
#[cfg(test)]
mod tests {
use crate::BloomFilter;
use ahash::RandomState;
#[test]
fn data_size() {
let size_bits = 512 * 1000;
let bloom = BloomFilter::<512>::builder_from_bits(size_bits).hashes(4);
assert_eq!(bloom.as_slice().len() * 64, size_bits);
let bloom = BloomFilter::<256>::builder_from_bits(size_bits).hashes(4);
assert_eq!(bloom.as_slice().len() * 64, size_bits);
let bloom = BloomFilter::<128>::builder_from_bits(size_bits).hashes(4);
assert_eq!(bloom.as_slice().len() * 64, size_bits);
let bloom = BloomFilter::<64>::builder_from_bits(size_bits).hashes(4);
assert_eq!(bloom.as_slice().len() * 64, size_bits);
}
#[test]
fn api() {
let _bloom = BloomFilter::<64>::builder_from_bits(10)
.hasher(RandomState::default())
.hashes(4);
}
#[test]
fn specified_hashes() {
for num_hashes in 1..1000 {
let b = BloomFilter::<128>::builder_from_bits(1).hashes(num_hashes);
assert_eq!(num_hashes, b.num_hashes());
}
}
}