stable_bloom_filter/
buckets.rs

1/// Buckets is a fast, space-efficient array of buckets where each bucket can
2/// store up to a configured maximum value.
3pub struct Buckets {
4    data: Vec<u8>,
5    bucket_size: u8,
6    max: u8,
7    count: usize,
8}
9
10impl Buckets {
11    /// Creates a new Buckets with the provided number of buckets where
12    /// each bucket is the specified number of bits.
13    pub fn new(count: usize, bucket_size: u8) -> Self {
14        if bucket_size > 8 {
15            panic!("max bucket_size is 8");
16        }
17        Buckets {
18            count,
19            bucket_size,
20            data: vec![0; (count * usize::from(bucket_size) + 7) / 8],
21            max: ((1u16 << u16::from(bucket_size)) - 1) as u8,
22        }
23    }
24
25    /// Returns the maximum value that can be stored in a bucket.
26    pub fn max_bucket_value(&self) -> u8 {
27        self.max
28    }
29
30    /// Returns the number of buckets.
31    pub fn count(&self) -> usize {
32        self.count
33    }
34
35    /// Decrease the value in the specified bucket by the provided delta.
36    /// The value is clamped to zero and the maximum bucket value.
37    /// Returns itself to allow for chaining.
38    pub fn decrease(&mut self, bucket: usize, delta: u8) -> &Self {
39        let val = (self.get_bits(bucket * usize::from(self.bucket_size), self.bucket_size) as u8)
40            .saturating_sub(delta);
41
42        self.set_bits(
43            (bucket as u32) * u32::from(self.bucket_size),
44            self.bucket_size,
45            val,
46        );
47        self
48    }
49
50    /// Increment the value in the specified bucket by the provided delta.
51    /// The value is clamped to zero and the maximum bucket value.
52    /// Returns itself to allow for chaining.
53    pub fn increment(&mut self, bucket: usize, delta: u8) -> &Self {
54        let val = (self.get_bits(bucket * usize::from(self.bucket_size), self.bucket_size) as u8)
55            .saturating_add(delta)
56            .min(self.max);
57
58        self.set_bits(
59            (bucket as u32) * u32::from(self.bucket_size),
60            self.bucket_size,
61            val,
62        );
63        self
64    }
65
66    /// Set the bucket value. The value is clamped to zero and the maximum
67    /// bucket value. Returns itself to allow for chaining.
68    pub fn set(&mut self, bucket: usize, value: u8) -> &Self {
69        let value = value.min(self.max);
70
71        self.set_bits(
72            (bucket as u32) * u32::from(self.bucket_size),
73            self.bucket_size,
74            value,
75        );
76        self
77    }
78
79    /// Returns the value in the specified bucket.
80    pub fn get(&self, bucket: usize) -> u8 {
81        self.get_bits(bucket * usize::from(self.bucket_size), self.bucket_size) as u8
82    }
83
84    /// Reset restores the Buckets to the original state.
85    /// Returns itself to allow for chaining.
86    pub fn reset(&mut self) -> &Self {
87        self.data = vec![0; (self.count * usize::from(self.bucket_size) + 7) / 8];
88        self
89    }
90
91    /// Returns the bits at the specified offset and length.
92    fn get_bits(&self, offset: usize, length: u8) -> u32 {
93        let byte_index = offset / 8;
94        let byte_offset = offset % 8;
95        if byte_offset as u8 + length > 8 {
96            let rem = 8 - byte_offset as u8;
97            return self.get_bits(offset, rem)
98                | (self.get_bits(offset + rem as usize, length - rem) << rem);
99        }
100
101        let bit_mask = (1 << length) - 1;
102        (u32::from(self.data[byte_index as usize]) & (bit_mask << byte_offset) as u32)
103            >> byte_offset
104    }
105
106    /// setBits sets bits at the specified offset and length.
107    fn set_bits(&mut self, offset: u32, length: u8, bits: u8) {
108        let byte_index = offset / 8;
109        let byte_offset = offset % 8;
110        if byte_offset as u8 + length > 8 {
111            let rem = 8 - byte_offset as u8;
112            self.set_bits(offset, rem, bits);
113            self.set_bits(offset + u32::from(rem), length - rem, bits >> rem);
114            return;
115        }
116
117        let bit_mask: u32 = (1 << length) - 1;
118        self.data[byte_index as usize] =
119            (u32::from(self.data[byte_index as usize]) & !(bit_mask << byte_offset)) as u8;
120        self.data[byte_index as usize] = (u32::from(self.data[byte_index as usize])
121            | ((u32::from(bits) & bit_mask) << byte_offset))
122            as u8;
123    }
124}
125
126#[cfg(test)]
127mod tests {
128    use super::Buckets;
129
130    // Ensures that MaxBucketValue returns the correct maximum based on the bucket
131    // size.
132    #[test]
133    fn test_max_bucket_value() {
134        let b = Buckets::new(10, 2);
135        assert_eq!(b.max_bucket_value(), 3);
136    }
137
138    // Ensures that Count returns the number of buckets.
139    #[test]
140    fn test_buckets_count() {
141        let b = Buckets::new(10, 2);
142        assert_eq!(b.count(), 10);
143    }
144
145    // Ensures that Increment increments the bucket value by the correct delta and
146    // clamps to zero and the maximum, Get returns the correct bucket value, and
147    // Set sets the bucket value correctly.
148    #[test]
149    fn test_buckets_increment_decrease_and_get_and_set() {
150        // bucket_size = 2
151        let mut b = Buckets::new(5, 2);
152
153        let _b = b.increment(0, 1);
154        assert_eq!(b.get(0), 1);
155
156        let _b = b.decrease(1, 1);
157        assert_eq!(b.get(1), 0);
158
159        let _b = b.set(2, 100);
160        assert_eq!(b.get(2), 3);
161
162        let _b = b.increment(3, 2);
163        assert_eq!(b.get(3), 2);
164        // bucket_size = 3
165        let mut b = Buckets::new(5, 3);
166
167        let _b = b.increment(0, 1);
168        assert_eq!(b.get(0), 1);
169
170        let _b = b.decrease(1, 1);
171        assert_eq!(b.get(1), 0);
172
173        let _b = b.set(2, 100);
174        assert_eq!(b.get(2), 7);
175
176        let _b = b.increment(3, 2);
177        assert_eq!(b.get(3), 2);
178        // bucket_size = 8
179        let mut b = Buckets::new(5, 8);
180
181        let _b = b.increment(0, 1);
182        assert_eq!(b.get(0), 1);
183
184        let _b = b.decrease(1, 1);
185        assert_eq!(b.get(1), 0);
186
187        let _b = b.set(2, 255);
188        assert_eq!(b.get(2), 255);
189
190        let _b = b.increment(3, 2);
191        assert_eq!(b.get(3), 2);
192    }
193
194    // Ensures that Reset restores the Buckets to the original state.
195    #[test]
196    fn test_buckets_reset() {
197        let mut b = Buckets::new(5, 2);
198
199        for i in 0..5 {
200            b.increment(i, 1);
201        }
202
203        let _b = b.reset();
204
205        for i in 0..5 {
206            assert_eq!(b.get(i), 0);
207        }
208    }
209}