1use std::hash::{Hash, Hasher};
2use std::io;
3use std::path::Path;
4
5use murmurhash3::murmurhash3_x64_128;
6
7use crate::bitvec::BitVector;
8use crate::mmap_bitvec::MmapBitVec;
9
10#[derive(Default)]
14pub struct MurmurHasher(u64, u64);
15
16impl MurmurHasher {
17 #[allow(missing_docs)]
18 pub fn new() -> Self {
19 MurmurHasher(0, 0)
20 }
21 #[allow(missing_docs)]
22 pub fn values(&self) -> (u64, u64) {
23 (self.0, self.1)
24 }
25}
26
27impl Hasher for MurmurHasher {
28 #[inline]
29 fn write(&mut self, bytes: &[u8]) {
30 let hash = murmurhash3_x64_128(bytes, self.0);
31 *self = MurmurHasher(hash.0, hash.1);
32 }
33
34 fn finish(&self) -> u64 {
35 self.0
36 }
37}
38
39pub struct BloomFilter {
41 bit_vec: MmapBitVec,
42 hashes: u8,
43}
44
45impl BloomFilter {
46 pub fn new<P>(filename: Option<P>, bits: usize, n_hashes: u8) -> Result<Self, io::Error>
51 where
52 P: AsRef<Path>,
53 {
54 let header = vec![];
55 let bitvec = match filename {
56 Some(filename) => {
57 if Path::exists(filename.as_ref()) {
58 MmapBitVec::open(&filename, Some(b"!!"), false)?
59 } else {
60 MmapBitVec::create(&filename, bits, Some(*b"!!"), &header)?
61 }
62 }
63 None => MmapBitVec::from_memory(bits)?,
64 };
65 Ok(BloomFilter {
66 bit_vec: bitvec,
67 hashes: n_hashes,
68 })
69 }
70
71 pub fn insert<H>(&mut self, item: H)
73 where
74 H: Hash,
75 {
76 let size = self.bit_vec.size();
77 let mut hasher = MurmurHasher::new();
78 for _ in 0..self.hashes {
79 item.hash(&mut hasher);
80 let hash: u64 = hasher.finish();
81 self.bit_vec.set(hash as usize % size, true);
82 }
83 }
84
85 pub fn contains<H>(&self, item: H) -> bool
87 where
88 H: Hash,
89 {
90 let size = self.bit_vec.size();
91 let mut hasher = MurmurHasher::new();
92 for _ in 0..self.hashes {
93 item.hash(&mut hasher);
94 let hash: u64 = hasher.finish();
95 if !self.bit_vec.get(hash as usize % size) {
96 return false;
97 }
98 }
99 true
100 }
101}
102
103#[cfg(test)]
104mod test {
105 use super::BloomFilter;
106
107 #[test]
108 fn test_bloom_filter() {
109 use std::fs::remove_file;
110 let mut filter = BloomFilter::new(Some("./test_bloom"), 100, 2).unwrap();
111 let (a, b) = (1, 2);
112 assert!(!filter.contains(a));
113 assert!(!filter.contains(b));
114 filter.insert(b);
115 assert!(!filter.contains(a));
116 assert!(filter.contains(b));
117
118 remove_file("./test_bloom").unwrap();
119 }
120}