memory_cache_rust/bloom/
bbloom.rs1use serde::{Deserialize, Serialize};
13
14const MASK: [u8; 8] = [1, 2, 4, 8, 16, 32, 64, 128];
15
16pub struct Bloom {
17 bitset: Vec<i64>,
18 elem_num: u64,
19 size_exp: u64,
20 size: u64,
21 set_locs: u64,
22 shift: u64,
23}
24
25fn calc_size_by_wrong_positives(num_entries: f64, wrongs: f64) -> (u64, u64) {
26
27 let size = -1.0 * num_entries * wrongs.ln() / 0.69314718056_f64.powf(2.0);
28 let locs = (0.69314718056_f64 * size / num_entries).ceil() ;
29 return (size as u64, locs as u64);
30}
31
32impl Bloom {
33 pub fn new(num_entries: f64, wrongs: f64) -> Self {
35 let mut entries = 0;
36 let mut locs = 0;
37 if wrongs < 1.0 {
38 let (e, l) = calc_size_by_wrong_positives(num_entries, wrongs);
39 entries = e;
40 locs = l;
41 } else {
42 entries = num_entries as u64;
43 locs = wrongs as u64;
44 }
45
46 let (size, exponent) = getSize(entries);
47 let mut b = Bloom {
48 bitset: vec![],
49 elem_num: 0,
50 size_exp: exponent,
51 size: size - 1,
52 set_locs: locs,
53 shift: 64 - exponent,
54 };
55 b.size(size);
56 b
57 }
58 pub fn add(&mut self, hash: u64) {
71 let h = hash >> self.shift;
72 let l = hash << self.shift >> self.shift;
73
74 for i in 0..self.set_locs {
75 self.set((h + (i * l)) & self.size);
76 self.elem_num += 1;
77 };
78 }
79 pub fn add_if_not_has(&mut self, hash: u64) -> bool {
83 if self.has(hash) {
84 return false;
85 }
86 self.add(hash);
87 true
88 }
89 pub fn clear(&mut self) {
91 self.bitset = vec![0; self.bitset.len()]
92 }
93 pub fn set(&mut self, idx: u64) {
95 let mut ptr: *mut i64 = self.bitset.as_mut_ptr();
99 unsafe {
100 let step = idx >> 6;ptr = ptr.wrapping_offset(step as isize);
102
103 *ptr |= MASK[(idx % 8) as usize] as i64;
104 };
105
106 }
107 pub fn size(&mut self, sz: u64) {
109 self.bitset = Vec::with_capacity((sz >> 6) as usize); for i in 0..(sz >> 6) as usize {
111 self.bitset.insert(i, 0)
112 }
113 }
114 pub fn has(&mut self, hash: u64) -> bool {
117 let h = hash >> self.shift;
118 let l = hash << self.shift >> self.shift;
119 for i in 0..self.set_locs {
120 if !self.isset((h + (i * l)) & self.size) {
121 return false;
122 }
123 }
124
125 true
126 }
127 pub fn isset(&mut self, idx: u64) -> bool {
129 let mut ptr: *mut i64 = self.bitset.as_mut_ptr();
130 unsafe {
134 let step = idx >> 6 ;
135 ptr = ptr.wrapping_offset(step as isize);
136 }
137
138 let r = unsafe { (*ptr >> (idx % 8)) & 1 };
139 r == 1
140 }
141 fn json_encoder(&mut self) -> Vec<u8> {
146 let mut bj = BloomJsonExport {
147 set_locs: self.set_locs,
148 filter_set: vec![0u8; (self.bitset.len() << 3) as usize],
149 };
150
151 for i in 0..bj.filter_set.len() {
152 let ptr: *mut i64 = self.bitset.as_mut_ptr();
153 bj.filter_set[i] = unsafe { ptr.wrapping_offset(i as isize) as u8 }
154 }
155 let data = serde_json::to_vec(&bj);
156 if let Ok(result) = data {
157 return result;
158 }
159 vec![]
160 }
161}
162
163#[derive(Serialize, Deserialize, Debug)]
164pub struct BloomJsonExport {
165 filter_set: Vec<u8>,
166 set_locs: u64,
167}
168
169
170fn getSize(mut u_i64: u64) -> (u64, u64) {
171 if u_i64 < 512 {
172 u_i64 = 512;
173 }
174 let mut exponent = 0;
175 let mut size = 1;
176 while size < u_i64 {
177 size <<= 1;
178 exponent += 1;
179 }
180 return (size, exponent);
181}
182
183
184#[cfg(test)]
185mod tests {
186 use std::collections::HashSet;
187
188 use uuid::Uuid;
189
190 use crate::bloom::rutil::mem_hash;
191
192 use super::*;
193
194 const N: usize = 1 << 16;
195
196
197 fn worldlist() -> Vec<Vec<u8>> {
198 let _seed = [0u8; 16];
199 let mut wordlist = Vec::with_capacity(N);
202 for _i in 0..wordlist.capacity() {
203 let uuid = Uuid::new_v4();
205 wordlist.push(Vec::from(uuid.as_bytes().map(|v| v)));
209 }
210 wordlist
211 }
212
213 #[test]
214 fn test_number_of_wrong() {
215 let mut bf = Bloom::new((N * 10) as f64, 7.0);
216 let mut cnt = 0;
217 let word_list = worldlist();
218 let mut set = HashSet::new();
219 for i in 0..word_list.len() {
221 let hash = mem_hash(&*word_list[i]);
222 set.insert(hash);
223 if !bf.add_if_not_has(hash.into()) {
224 cnt += 1;
225 }
226 }
227
228 assert_eq!(set.len(), word_list.len());
229
230 println!("Bloomfilter New(7* 2**16, 7) \
231 (-> size={} bit): \n \
232 Check for 'false positives': {}\
233 wrong positive 'Has' results on 2**16 entries => {} %%\n",
234 bf.bitset.len() << 6, cnt, (cnt) as f64 / (N) as f64)
235 }
236
237 #[test]
238 fn test_has() {
239 let mut bf = Bloom::new((N * 10) as f64, 7.0);
240
241 let v = bf.has(18272025040905874063);
242 assert_eq!(v, false);
243
244 let _v = bf.has(18272025040905874063);
245 bf.add_if_not_has(18272025040905874063);
246 let v = bf.has(18272025040905874063);
247 assert_eq!(v, true)
248 }
249
250 #[test]
251 fn oprator_test() {
252 let a = 1; let b = 2;
256 assert_eq!(a & b, 0);
257 assert_eq!(a | b, 3);
258 assert_eq!(a ^ b, 3);
259 assert_eq!(a << 4, 16);
260 assert_eq!(a >> b, 0);
261 assert_eq!(31 >> 4, 1);
262 assert_eq!(31 << 2, 124);
263 assert_eq!(31 >> 3, 3);
264 }
265}