fastbloom_rs/
vec.rs

1use core::slice;
2use std::{fs::File, io::{Read, Seek}};
3
4use crate::builder::SUFFIX;
5
6#[inline(always)]
7fn get_usize_len() -> usize {
8    if cfg!(target_pointer_width = "64") { 64 } else if cfg!(target_pointer_width = "32") { 32 } else { panic!() }
9}
10
11/// bitmap only for bloom filter.
12#[derive(Debug)]
13#[derive(Clone)]
14#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
15pub(crate) struct BloomBitVec {
16    /// Internal representation of the bit vector
17    pub(crate) storage: Vec<usize>,
18    /// The number of valid bits in the internal representation
19    pub(crate) nbits: u64,
20}
21
22impl BloomBitVec {
23    pub fn new(slots: usize) -> Self {
24        BloomBitVec {
25            storage: vec![0; slots],
26            nbits: (slots * get_usize_len()) as u64,
27        }
28    }
29
30    pub fn from_elem(slots: usize, bit: bool) -> Self {
31        BloomBitVec {
32            storage: vec![if bit { !0 } else { 0 }; slots],
33            nbits: (slots * get_usize_len()) as u64,
34        }
35    }
36    
37    pub fn from_file(file: &mut File, seek: u64, bytes_len: u64) -> Self {
38        #[cfg(target_pointer_width = "64")]
39            let length = bytes_len / 8;
40        #[cfg(target_pointer_width = "32")]
41            let length = bytes_len / 4;
42        
43        let nbits = bytes_len * 8;
44
45        let mut storage = vec![0usize; length.try_into().unwrap()];
46        let ptr = storage.as_mut_ptr();
47        let buf = ptr as *mut u8;
48        let buf = unsafe {
49            slice::from_raw_parts_mut(buf, bytes_len.try_into().unwrap())
50        };
51
52        file.seek(std::io::SeekFrom::Start(seek)).unwrap();
53        file.read_exact(buf).unwrap();
54
55        BloomBitVec {
56            storage,
57            nbits: nbits.try_into().unwrap()
58        }
59    }
60
61    #[inline]
62    pub fn set(&mut self, index: usize) {
63        #[cfg(target_pointer_width = "64")]
64            let w = index >> 6;
65        #[cfg(target_pointer_width = "32")]
66            let w = index >> 5;
67        let b = index & SUFFIX;
68        let flag = 1usize << b;
69        self.storage[w] = self.storage[w] | flag;
70    }
71
72    #[inline]
73    pub fn get(&self, index: usize) -> bool {
74        #[cfg(target_pointer_width = "64")]
75            let w = index >> 6;
76        #[cfg(target_pointer_width = "32")]
77            let w = index >> 5;
78        let b = index & SUFFIX;
79        let flag = 1usize << b;
80        (self.storage[w] & flag) != 0
81    }
82
83    pub fn or(&mut self, other: &BloomBitVec) {
84        for (m, o) in self.storage.iter_mut().zip(&other.storage) {
85            *m |= *o;
86        }
87    }
88
89    pub fn xor(&mut self, other: &BloomBitVec) {
90        for (m, o) in self.storage.iter_mut().zip(&other.storage) {
91            *m ^= *o;
92        }
93    }
94
95    pub fn nor(&mut self, other: &Self) {
96        for (m, o) in self.storage.iter_mut().zip(&other.storage) {
97            *m = !(*m | *o);
98        }
99    }
100
101    pub fn xnor(&mut self, other: &Self) {
102        for (m, o) in self.storage.iter_mut().zip(&other.storage) {
103            *m = !(*m ^ *o);
104        }
105    }
106
107    pub fn and(&mut self, other: &BloomBitVec) {
108        for (m, o) in self.storage.iter_mut().zip(&other.storage) {
109            *m &= *o;
110        }
111    }
112
113    pub fn nand(&mut self, other: &Self) {
114        for (m, o) in self.storage.iter_mut().zip(&other.storage) {
115            *m = !(*m & *o);
116        }
117    }
118
119    pub fn difference(&mut self, other: &Self) {
120        for (m, o) in self.storage.iter_mut().zip(&other.storage) {
121            *m &= !*o;
122        }
123    }
124
125    pub fn count_zeros(&self)->u32 {
126        self.storage.iter().fold(0, |acc, x| acc + x.count_zeros())
127    }
128
129    pub fn clear(&mut self) {
130        self.storage.fill(0);
131    }
132
133    pub fn is_empty(&self) -> bool {
134        self.storage.is_empty()
135    }
136}
137
138/// counter vector for counting bloom filter.
139#[derive(Debug)]
140#[derive(Clone)]
141pub(crate) struct CountingVec {
142    /// Internal representation of the vector
143    pub(crate) storage: Vec<usize>,
144    /// The number of valid counter in the internal representation
145    pub(crate) counters: u64,
146    /// The number of valid counter in a slot which mean usize.
147    pub(crate) counter_per_slot: usize,
148}
149
150impl CountingVec {
151    /// create a CountingVec
152    pub fn new(slots: usize) -> Self {
153        let counter_per_slot = get_usize_len() >> 2;
154        CountingVec {
155            storage: vec![0; slots],
156            counters: (slots * counter_per_slot) as u64,
157            counter_per_slot,
158        }
159    }
160
161    #[inline]
162    pub fn increment(&mut self, index: usize) {
163        let current = self.get(index);
164        #[cfg(target_pointer_width = "64")]
165        if current != 0b1111 {
166            let current = current + 1;
167            let w = index >> 4;
168            let b = index & 0b1111;
169            let move_bits = (15 - b) * 4;
170            self.storage[w] =
171                (self.storage[w] & !(0b1111 << move_bits)) | (current << move_bits)
172        }
173
174        #[cfg(target_pointer_width = "32")]
175        if current != 0b111 {
176            let current = current + 1;
177            let w = index >> 3;
178            let b = index & 0b111;
179            let move_bits = (7 - b) * 4;
180            self.storage[w] =
181                (self.storage[w] & !(0b1111 << move_bits)) | (current << move_bits)
182        }
183    }
184
185    #[inline]
186    pub fn decrement(&mut self, index: usize) {
187        let current = self.get(index);
188        if current > 0 {
189            if cfg!(target_pointer_width="64") {
190                let current = current - 1;
191                let w = index >> 4;
192                let b = index & 0b1111;
193                let move_bits = (15 - b) * 4;
194                self.storage[w] =
195                    (self.storage[w] & !(0b1111 << move_bits)) | (current << move_bits)
196            } else if cfg!(target_pointer_width="32") {
197                let current = current - 1;
198                let w = index >> 3;
199                let b = index & 0b111;
200                let move_bits = (7 - b) * 4;
201                self.storage[w] =
202                    (self.storage[w] & !(0b1111 << move_bits)) | (current << move_bits)
203            }
204        }
205    }
206
207    #[inline]
208    pub fn get(&self, index: usize) -> usize {
209        #[cfg(target_pointer_width = "64")]
210            let w = index >> 4;
211        #[cfg(target_pointer_width = "64")]
212            let b = index & 0b1111;
213        #[cfg(target_pointer_width = "32")]
214            let w = index >> 3;
215        #[cfg(target_pointer_width = "32")]
216            let b = index & 0b111;
217        let slot = self.storage[w];
218        #[cfg(target_pointer_width = "64")]
219        return (slot >> ((15 - b) * 4)) & 0b1111;
220        #[cfg(target_pointer_width = "32")]
221        return (slot >> ((7 - b) * 4)) & 0b111;
222    }
223
224    pub fn clear(&mut self) {
225        self.storage.fill(0);
226    }
227}
228
229#[test]
230fn test_vec() {
231    let mut vec = BloomBitVec::new(16);
232    vec.set(37);
233    vec.set(38);
234    println!("{:?}", vec);
235    assert_eq!(vec.get(37), true);
236    assert_eq!(vec.get(38), true);
237}
238
239#[test]
240fn test_size() {
241    println!("{}", get_usize_len());
242    #[cfg(target_pointer_width = "64")]
243    assert_eq!(get_usize_len(), 64);
244    #[cfg(target_pointer_width = "32")]
245    assert_eq!(get_usize_len(), 32);
246}
247
248#[test]
249fn test_count_vec() {
250    let mut vec = CountingVec::new(10);
251    vec.increment(7);
252
253    assert_eq!(1, vec.get(7))
254}
255
256#[test]
257fn test_count_zeros() {
258    let mut vec = BloomBitVec::new(4);
259    vec.set(37);
260    vec.set(30);
261    vec.set(38);
262    println!("{:?}", vec);
263    #[cfg(target_pointer_width = "64")]
264    assert_eq!(vec.count_zeros(), 253);
265    #[cfg(target_pointer_width = "32")]
266    assert_eq!(vec.count_zeros(), 125);
267}