rusty_leveldb/
filter.rs

1use std::rc::Rc;
2
3use integer_encoding::FixedInt;
4
5/// Encapsulates a filter algorithm allowing to search for keys more efficiently.
6/// Usually, policies are used as a BoxedFilterPolicy (see below), so they
7/// can be easily cloned and nested.
8pub trait FilterPolicy {
9    /// Returns a string identifying this policy.
10    fn name(&self) -> &'static str;
11    /// Create a filter matching the given keys. Keys are given as a long byte array that is
12    /// indexed by the offsets contained in key_offsets.
13    fn create_filter(&self, keys: &[u8], key_offsets: &[usize]) -> Vec<u8>;
14    /// Check whether the given key may match the filter.
15    fn key_may_match(&self, key: &[u8], filter: &[u8]) -> bool;
16}
17
18/// A boxed and refcounted filter policy (reference-counted because a Box with unsized content
19/// couldn't be cloned otherwise)
20pub type BoxedFilterPolicy = Rc<Box<dyn FilterPolicy>>;
21
22impl FilterPolicy for BoxedFilterPolicy {
23    fn name(&self) -> &'static str {
24        (**self).name()
25    }
26    fn create_filter(&self, keys: &[u8], key_offsets: &[usize]) -> Vec<u8> {
27        (**self).create_filter(keys, key_offsets)
28    }
29    fn key_may_match(&self, key: &[u8], filter: &[u8]) -> bool {
30        (**self).key_may_match(key, filter)
31    }
32}
33
34/// Used for tables that don't have filter blocks but need a type parameter.
35#[derive(Clone)]
36pub struct NoFilterPolicy;
37
38impl NoFilterPolicy {
39    pub fn new() -> NoFilterPolicy {
40        NoFilterPolicy
41    }
42}
43
44impl FilterPolicy for NoFilterPolicy {
45    fn name(&self) -> &'static str {
46        "_"
47    }
48    fn create_filter(&self, _: &[u8], _: &[usize]) -> Vec<u8> {
49        vec![]
50    }
51    fn key_may_match(&self, _: &[u8], _: &[u8]) -> bool {
52        true
53    }
54}
55
56const BLOOM_SEED: u32 = 0xbc9f1d34;
57
58/// A filter policy using a bloom filter internally.
59#[derive(Clone)]
60pub struct BloomPolicy {
61    bits_per_key: u32,
62    k: u32,
63}
64
65/// Beware the magic numbers...
66impl BloomPolicy {
67    /// Returns a new boxed BloomPolicy.
68    pub fn new(bits_per_key: u32) -> BloomPolicy {
69        BloomPolicy::new_unwrapped(bits_per_key)
70    }
71
72    /// Returns a new BloomPolicy with the given parameter.
73    fn new_unwrapped(bits_per_key: u32) -> BloomPolicy {
74        let mut k = (bits_per_key as f32 * 0.69) as u32;
75
76        k = k.clamp(1, 30);
77
78        BloomPolicy { bits_per_key, k }
79    }
80
81    fn bloom_hash(&self, data: &[u8]) -> u32 {
82        let m: u32 = 0xc6a4a793;
83        let r: u32 = 24;
84
85        let mut ix = 0;
86        let limit = data.len();
87
88        let mut h: u32 = BLOOM_SEED ^ (limit as u64 * m as u64) as u32;
89
90        while ix + 4 <= limit {
91            let w = u32::decode_fixed(&data[ix..ix + 4]);
92            ix += 4;
93
94            h = (h as u64 + w as u64) as u32;
95            h = (h as u64 * m as u64) as u32;
96            h ^= h >> 16;
97        }
98
99        // Process left-over bytes
100        assert!(limit - ix < 4);
101
102        if limit - ix > 0 {
103            for (i, b) in data[ix..].iter().enumerate() {
104                h = h.overflowing_add((*b as u32) << (8 * i)).0;
105            }
106
107            h = (h as u64 * m as u64) as u32;
108            h ^= h >> r;
109        }
110        h
111    }
112}
113
114impl FilterPolicy for BloomPolicy {
115    fn name(&self) -> &'static str {
116        "leveldb.BuiltinBloomFilter2"
117    }
118    fn create_filter(&self, keys: &[u8], key_offsets: &[usize]) -> Vec<u8> {
119        let filter_bits = key_offsets.len() * self.bits_per_key as usize;
120        let mut filter: Vec<u8>;
121
122        if filter_bits < 64 {
123            filter = Vec::with_capacity(8 + 1);
124            filter.resize(8, 0);
125        } else {
126            filter = Vec::with_capacity(1 + filter_bits.div_ceil(8));
127            filter.resize(filter_bits.div_ceil(8), 0);
128        }
129
130        let adj_filter_bits = (filter.len() * 8) as u32;
131
132        // Encode k at the end of the filter.
133        filter.push(self.k as u8);
134
135        // Add all keys to the filter.
136        offset_data_iterate(keys, key_offsets, |key| {
137            let mut h = self.bloom_hash(key);
138            let delta = h.rotate_left(15);
139            for _ in 0..self.k {
140                let bitpos = (h % adj_filter_bits) as usize;
141                filter[bitpos / 8] |= 1 << (bitpos % 8);
142                h = (h as u64 + delta as u64) as u32;
143            }
144        });
145
146        filter
147    }
148    fn key_may_match(&self, key: &[u8], filter: &[u8]) -> bool {
149        if filter.is_empty() {
150            return true;
151        }
152
153        let bits = (filter.len() - 1) as u32 * 8;
154        let k = filter[filter.len() - 1];
155        let filter_adj = &filter[0..filter.len() - 1];
156
157        if k > 30 {
158            return true;
159        }
160
161        let mut h = self.bloom_hash(key);
162        let delta = h.rotate_left(15);
163        for _ in 0..k {
164            let bitpos = (h % bits) as usize;
165            if (filter_adj[bitpos / 8] & (1 << (bitpos % 8))) == 0 {
166                return false;
167            }
168            h = (h as u64 + delta as u64) as u32;
169        }
170        true
171    }
172}
173
174/// A filter policy wrapping another policy; extracting the user key from internal keys for all
175/// operations.
176/// A User Key is u8*.
177/// An Internal Key is u8* u8{8} (where the second part encodes a tag and a sequence number).
178#[derive(Clone)]
179pub struct InternalFilterPolicy<FP: FilterPolicy> {
180    internal: FP,
181}
182
183impl<FP: FilterPolicy> InternalFilterPolicy<FP> {
184    pub fn new(inner: FP) -> InternalFilterPolicy<FP> {
185        InternalFilterPolicy { internal: inner }
186    }
187}
188
189impl<FP: FilterPolicy> FilterPolicy for InternalFilterPolicy<FP> {
190    fn name(&self) -> &'static str {
191        self.internal.name()
192    }
193
194    fn create_filter(&self, keys: &[u8], key_offsets: &[usize]) -> Vec<u8> {
195        let mut mod_keys = Vec::with_capacity(keys.len() - key_offsets.len() * 8);
196        let mut mod_key_offsets = Vec::with_capacity(key_offsets.len());
197
198        offset_data_iterate(keys, key_offsets, |key| {
199            mod_key_offsets.push(mod_keys.len());
200            mod_keys.extend_from_slice(&key[0..key.len() - 8]);
201        });
202        self.internal.create_filter(&mod_keys, &mod_key_offsets)
203    }
204
205    fn key_may_match(&self, key: &[u8], filter: &[u8]) -> bool {
206        self.internal.key_may_match(&key[0..key.len() - 8], filter)
207    }
208}
209
210/// offset_data_iterate iterates over the entries in data that are indexed by the offsets given in
211/// offsets. This is e.g. the internal format of a FilterBlock.
212fn offset_data_iterate<F: FnMut(&[u8])>(data: &[u8], offsets: &[usize], mut f: F) {
213    for offix in 0..offsets.len() {
214        let upper = if offix == offsets.len() - 1 {
215            data.len()
216        } else {
217            offsets[offix + 1]
218        };
219        let piece = &data[offsets[offix]..upper];
220        f(piece);
221    }
222}
223
224#[cfg(test)]
225mod tests {
226    use super::*;
227    use crate::key_types::LookupKey;
228
229    const _BITS_PER_KEY: u32 = 12;
230
231    fn input_data() -> (Vec<u8>, Vec<usize>) {
232        let mut concat = vec![];
233        let mut offs = vec![];
234
235        for d in [
236            "abc123def456".as_bytes(),
237            "xxx111xxx222".as_bytes(),
238            "ab00cd00ab".as_bytes(),
239            "908070605040302010".as_bytes(),
240        ]
241        .iter()
242        {
243            offs.push(concat.len());
244            concat.extend_from_slice(d);
245        }
246        (concat, offs)
247    }
248
249    /// Creates a filter using the keys from input_data().
250    fn create_filter() -> Vec<u8> {
251        let fpol = BloomPolicy::new(_BITS_PER_KEY);
252        let (data, offs) = input_data();
253        let filter = fpol.create_filter(&data, &offs);
254
255        assert_eq!(filter, vec![194, 148, 129, 140, 192, 196, 132, 164, 8]);
256        filter
257    }
258
259    /// Creates a filter using the keys from input_data() but converted to InternalKey format.
260    fn create_internalkey_filter() -> Vec<u8> {
261        let fpol = Rc::new(Box::new(InternalFilterPolicy::new(BloomPolicy::new(
262            _BITS_PER_KEY,
263        ))));
264        let (data, offs) = input_data();
265        let (mut intdata, mut intoffs) = (vec![], vec![]);
266
267        offset_data_iterate(&data, &offs, |key| {
268            let ikey = LookupKey::new(key, 123);
269            intoffs.push(intdata.len());
270            intdata.extend_from_slice(ikey.internal_key());
271        });
272
273        fpol.create_filter(&intdata, &intoffs)
274    }
275
276    #[test]
277    fn test_filter_bloom() {
278        let f = create_filter();
279        let fp = BloomPolicy::new(_BITS_PER_KEY);
280        let (data, offs) = input_data();
281
282        offset_data_iterate(&data, &offs, |key| {
283            assert!(fp.key_may_match(key, &f));
284        });
285    }
286
287    /// This test verifies that InternalFilterPolicy works correctly.
288    #[test]
289    fn test_filter_internal_keys_identical() {
290        assert_eq!(create_filter(), create_internalkey_filter());
291    }
292
293    #[test]
294    fn test_filter_bloom_hash() {
295        let d1 = vec![0x62];
296        let d2 = vec![0xc3, 0x97];
297        let d3 = vec![0xe2, 0x99, 0xa5];
298        let d4 = vec![0xe1, 0x80, 0xb9, 0x32];
299
300        let fp = BloomPolicy::new_unwrapped(_BITS_PER_KEY);
301
302        assert_eq!(fp.bloom_hash(&d1), 0xef1345c4);
303        assert_eq!(fp.bloom_hash(&d2), 0x5b663814);
304        assert_eq!(fp.bloom_hash(&d3), 0x323c078f);
305        assert_eq!(fp.bloom_hash(&d4), 0xed21633a);
306    }
307}