1use std::rc::Rc;
2
3use integer_encoding::FixedInt;
4
5pub trait FilterPolicy {
9 fn name(&self) -> &'static str;
11 fn create_filter(&self, keys: &[u8], key_offsets: &[usize]) -> Vec<u8>;
14 fn key_may_match(&self, key: &[u8], filter: &[u8]) -> bool;
16}
17
18pub type BoxedFilterPolicy = Rc<Box<dyn FilterPolicy>>;
21
22impl FilterPolicy for BoxedFilterPolicy {
23 fn name(&self) -> &'static str {
24 (**self).name()
25 }
26 fn create_filter(&self, keys: &[u8], key_offsets: &[usize]) -> Vec<u8> {
27 (**self).create_filter(keys, key_offsets)
28 }
29 fn key_may_match(&self, key: &[u8], filter: &[u8]) -> bool {
30 (**self).key_may_match(key, filter)
31 }
32}
33
34#[derive(Clone)]
36pub struct NoFilterPolicy;
37
38impl NoFilterPolicy {
39 pub fn new() -> NoFilterPolicy {
40 NoFilterPolicy
41 }
42}
43
44impl FilterPolicy for NoFilterPolicy {
45 fn name(&self) -> &'static str {
46 "_"
47 }
48 fn create_filter(&self, _: &[u8], _: &[usize]) -> Vec<u8> {
49 vec![]
50 }
51 fn key_may_match(&self, _: &[u8], _: &[u8]) -> bool {
52 true
53 }
54}
55
56const BLOOM_SEED: u32 = 0xbc9f1d34;
57
58#[derive(Clone)]
60pub struct BloomPolicy {
61 bits_per_key: u32,
62 k: u32,
63}
64
65impl BloomPolicy {
67 pub fn new(bits_per_key: u32) -> BloomPolicy {
69 BloomPolicy::new_unwrapped(bits_per_key)
70 }
71
72 fn new_unwrapped(bits_per_key: u32) -> BloomPolicy {
74 let mut k = (bits_per_key as f32 * 0.69) as u32;
75
76 k = k.clamp(1, 30);
77
78 BloomPolicy { bits_per_key, k }
79 }
80
81 fn bloom_hash(&self, data: &[u8]) -> u32 {
82 let m: u32 = 0xc6a4a793;
83 let r: u32 = 24;
84
85 let mut ix = 0;
86 let limit = data.len();
87
88 let mut h: u32 = BLOOM_SEED ^ (limit as u64 * m as u64) as u32;
89
90 while ix + 4 <= limit {
91 let w = u32::decode_fixed(&data[ix..ix + 4]);
92 ix += 4;
93
94 h = (h as u64 + w as u64) as u32;
95 h = (h as u64 * m as u64) as u32;
96 h ^= h >> 16;
97 }
98
99 assert!(limit - ix < 4);
101
102 if limit - ix > 0 {
103 for (i, b) in data[ix..].iter().enumerate() {
104 h = h.overflowing_add((*b as u32) << (8 * i)).0;
105 }
106
107 h = (h as u64 * m as u64) as u32;
108 h ^= h >> r;
109 }
110 h
111 }
112}
113
114impl FilterPolicy for BloomPolicy {
115 fn name(&self) -> &'static str {
116 "leveldb.BuiltinBloomFilter2"
117 }
118 fn create_filter(&self, keys: &[u8], key_offsets: &[usize]) -> Vec<u8> {
119 let filter_bits = key_offsets.len() * self.bits_per_key as usize;
120 let mut filter: Vec<u8>;
121
122 if filter_bits < 64 {
123 filter = Vec::with_capacity(8 + 1);
124 filter.resize(8, 0);
125 } else {
126 filter = Vec::with_capacity(1 + filter_bits.div_ceil(8));
127 filter.resize(filter_bits.div_ceil(8), 0);
128 }
129
130 let adj_filter_bits = (filter.len() * 8) as u32;
131
132 filter.push(self.k as u8);
134
135 offset_data_iterate(keys, key_offsets, |key| {
137 let mut h = self.bloom_hash(key);
138 let delta = h.rotate_left(15);
139 for _ in 0..self.k {
140 let bitpos = (h % adj_filter_bits) as usize;
141 filter[bitpos / 8] |= 1 << (bitpos % 8);
142 h = (h as u64 + delta as u64) as u32;
143 }
144 });
145
146 filter
147 }
148 fn key_may_match(&self, key: &[u8], filter: &[u8]) -> bool {
149 if filter.is_empty() {
150 return true;
151 }
152
153 let bits = (filter.len() - 1) as u32 * 8;
154 let k = filter[filter.len() - 1];
155 let filter_adj = &filter[0..filter.len() - 1];
156
157 if k > 30 {
158 return true;
159 }
160
161 let mut h = self.bloom_hash(key);
162 let delta = h.rotate_left(15);
163 for _ in 0..k {
164 let bitpos = (h % bits) as usize;
165 if (filter_adj[bitpos / 8] & (1 << (bitpos % 8))) == 0 {
166 return false;
167 }
168 h = (h as u64 + delta as u64) as u32;
169 }
170 true
171 }
172}
173
174#[derive(Clone)]
179pub struct InternalFilterPolicy<FP: FilterPolicy> {
180 internal: FP,
181}
182
183impl<FP: FilterPolicy> InternalFilterPolicy<FP> {
184 pub fn new(inner: FP) -> InternalFilterPolicy<FP> {
185 InternalFilterPolicy { internal: inner }
186 }
187}
188
189impl<FP: FilterPolicy> FilterPolicy for InternalFilterPolicy<FP> {
190 fn name(&self) -> &'static str {
191 self.internal.name()
192 }
193
194 fn create_filter(&self, keys: &[u8], key_offsets: &[usize]) -> Vec<u8> {
195 let mut mod_keys = Vec::with_capacity(keys.len() - key_offsets.len() * 8);
196 let mut mod_key_offsets = Vec::with_capacity(key_offsets.len());
197
198 offset_data_iterate(keys, key_offsets, |key| {
199 mod_key_offsets.push(mod_keys.len());
200 mod_keys.extend_from_slice(&key[0..key.len() - 8]);
201 });
202 self.internal.create_filter(&mod_keys, &mod_key_offsets)
203 }
204
205 fn key_may_match(&self, key: &[u8], filter: &[u8]) -> bool {
206 self.internal.key_may_match(&key[0..key.len() - 8], filter)
207 }
208}
209
210fn offset_data_iterate<F: FnMut(&[u8])>(data: &[u8], offsets: &[usize], mut f: F) {
213 for offix in 0..offsets.len() {
214 let upper = if offix == offsets.len() - 1 {
215 data.len()
216 } else {
217 offsets[offix + 1]
218 };
219 let piece = &data[offsets[offix]..upper];
220 f(piece);
221 }
222}
223
224#[cfg(test)]
225mod tests {
226 use super::*;
227 use crate::key_types::LookupKey;
228
229 const _BITS_PER_KEY: u32 = 12;
230
231 fn input_data() -> (Vec<u8>, Vec<usize>) {
232 let mut concat = vec![];
233 let mut offs = vec![];
234
235 for d in [
236 "abc123def456".as_bytes(),
237 "xxx111xxx222".as_bytes(),
238 "ab00cd00ab".as_bytes(),
239 "908070605040302010".as_bytes(),
240 ]
241 .iter()
242 {
243 offs.push(concat.len());
244 concat.extend_from_slice(d);
245 }
246 (concat, offs)
247 }
248
249 fn create_filter() -> Vec<u8> {
251 let fpol = BloomPolicy::new(_BITS_PER_KEY);
252 let (data, offs) = input_data();
253 let filter = fpol.create_filter(&data, &offs);
254
255 assert_eq!(filter, vec![194, 148, 129, 140, 192, 196, 132, 164, 8]);
256 filter
257 }
258
259 fn create_internalkey_filter() -> Vec<u8> {
261 let fpol = Rc::new(Box::new(InternalFilterPolicy::new(BloomPolicy::new(
262 _BITS_PER_KEY,
263 ))));
264 let (data, offs) = input_data();
265 let (mut intdata, mut intoffs) = (vec![], vec![]);
266
267 offset_data_iterate(&data, &offs, |key| {
268 let ikey = LookupKey::new(key, 123);
269 intoffs.push(intdata.len());
270 intdata.extend_from_slice(ikey.internal_key());
271 });
272
273 fpol.create_filter(&intdata, &intoffs)
274 }
275
276 #[test]
277 fn test_filter_bloom() {
278 let f = create_filter();
279 let fp = BloomPolicy::new(_BITS_PER_KEY);
280 let (data, offs) = input_data();
281
282 offset_data_iterate(&data, &offs, |key| {
283 assert!(fp.key_may_match(key, &f));
284 });
285 }
286
287 #[test]
289 fn test_filter_internal_keys_identical() {
290 assert_eq!(create_filter(), create_internalkey_filter());
291 }
292
293 #[test]
294 fn test_filter_bloom_hash() {
295 let d1 = vec![0x62];
296 let d2 = vec![0xc3, 0x97];
297 let d3 = vec![0xe2, 0x99, 0xa5];
298 let d4 = vec![0xe1, 0x80, 0xb9, 0x32];
299
300 let fp = BloomPolicy::new_unwrapped(_BITS_PER_KEY);
301
302 assert_eq!(fp.bloom_hash(&d1), 0xef1345c4);
303 assert_eq!(fp.bloom_hash(&d2), 0x5b663814);
304 assert_eq!(fp.bloom_hash(&d3), 0x323c078f);
305 assert_eq!(fp.bloom_hash(&d4), 0xed21633a);
306 }
307}