1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
use std::hash::{Hash, Hasher};
use std::io;
use std::path::Path;
use murmurhash3::murmurhash3_x64_128;
use crate::bitvec::BitVector;
use crate::mmap_bitvec::MmapBitVec;
#[derive(Default)]
pub struct MurmurHasher(u64, u64);
impl MurmurHasher {
#[allow(missing_docs)]
pub fn new() -> Self {
MurmurHasher(0, 0)
}
#[allow(missing_docs)]
pub fn values(&self) -> (u64, u64) {
(self.0, self.1)
}
}
impl Hasher for MurmurHasher {
#[inline]
fn write(&mut self, bytes: &[u8]) {
let hash = murmurhash3_x64_128(bytes, self.0);
*self = MurmurHasher(hash.0, hash.1);
}
fn finish(&self) -> u64 {
self.0
}
}
pub struct BloomFilter {
bit_vec: MmapBitVec,
hashes: u8,
}
impl BloomFilter {
pub fn new<P>(filename: Option<P>, bits: usize, n_hashes: u8) -> Result<Self, io::Error>
where
P: AsRef<Path>,
{
let header = vec![];
let bitvec = match filename {
Some(filename) => {
if Path::exists(filename.as_ref()) {
MmapBitVec::open(&filename, Some(b"!!"), false)?
} else {
MmapBitVec::create(&filename, bits, Some(*b"!!"), &header)?
}
}
None => MmapBitVec::from_memory(bits)?,
};
Ok(BloomFilter {
bit_vec: bitvec,
hashes: n_hashes,
})
}
pub fn insert<H>(&mut self, item: H)
where
H: Hash,
{
let size = self.bit_vec.size();
let mut hasher = MurmurHasher::new();
for _ in 0..self.hashes {
item.hash(&mut hasher);
let hash: u64 = hasher.finish();
self.bit_vec.set(hash as usize % size, true);
}
}
pub fn contains<H>(&self, item: H) -> bool
where
H: Hash,
{
let size = self.bit_vec.size();
let mut hasher = MurmurHasher::new();
for _ in 0..self.hashes {
item.hash(&mut hasher);
let hash: u64 = hasher.finish();
if !self.bit_vec.get(hash as usize % size) {
return false;
}
}
true
}
}
#[cfg(test)]
mod test {
use super::BloomFilter;
#[test]
fn test_bloom_filter() {
use std::fs::remove_file;
let mut filter = BloomFilter::new(Some("./test_bloom"), 100, 2).unwrap();
let (a, b) = (1, 2);
assert!(!filter.contains(a));
assert!(!filter.contains(b));
filter.insert(b);
assert!(!filter.contains(a));
assert!(filter.contains(b));
remove_file("./test_bloom").unwrap();
}
}