use std::hash::{Hash, Hasher};
use std::io;
use std::path::Path;
use murmurhash3::murmurhash3_x64_128;
use crate::bitvec::BitVector;
use crate::mmap_bitvec::MmapBitVec;
#[derive(Default)]
pub struct MurmurHasher(u64, u64);
impl MurmurHasher {
#[allow(missing_docs)]
pub fn new() -> Self {
MurmurHasher(0, 0)
}
#[allow(missing_docs)]
pub fn values(&self) -> (u64, u64) {
(self.0, self.1)
}
}
impl Hasher for MurmurHasher {
#[inline]
fn write(&mut self, bytes: &[u8]) {
let hash = murmurhash3_x64_128(bytes, self.0);
*self = MurmurHasher(hash.0, hash.1);
}
fn finish(&self) -> u64 {
self.0
}
}
pub struct BloomFilter {
bit_vec: MmapBitVec,
hashes: u8,
}
impl BloomFilter {
pub fn new<P>(filename: Option<P>, bits: usize, n_hashes: u8) -> Result<Self, io::Error>
where
P: AsRef<Path>,
{
let header = vec![];
let bitvec = match filename {
Some(filename) => {
if Path::exists(filename.as_ref()) {
MmapBitVec::open(&filename, Some(b"!!"), false)?
} else {
MmapBitVec::create(&filename, bits, Some(*b"!!"), &header)?
}
}
None => MmapBitVec::from_memory(bits)?,
};
Ok(BloomFilter {
bit_vec: bitvec,
hashes: n_hashes,
})
}
pub fn insert<H>(&mut self, item: H)
where
H: Hash,
{
let size = self.bit_vec.size();
let mut hasher = MurmurHasher::new();
for _ in 0..self.hashes {
item.hash(&mut hasher);
let hash: u64 = hasher.finish();
self.bit_vec.set(hash as usize % size, true);
}
}
pub fn contains<H>(&self, item: H) -> bool
where
H: Hash,
{
let size = self.bit_vec.size();
let mut hasher = MurmurHasher::new();
for _ in 0..self.hashes {
item.hash(&mut hasher);
let hash: u64 = hasher.finish();
if !self.bit_vec.get(hash as usize % size) {
return false;
}
}
true
}
}
#[cfg(test)]
mod test {
use super::BloomFilter;
#[test]
fn test_bloom_filter() {
use std::fs::remove_file;
let mut filter = BloomFilter::new(Some("./test_bloom"), 100, 2).unwrap();
let (a, b) = (1, 2);
assert!(!filter.contains(a));
assert!(!filter.contains(b));
filter.insert(b);
assert!(!filter.contains(a));
assert!(filter.contains(b));
remove_file("./test_bloom").unwrap();
}
}