1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
// Copyright (c) 2021 Google LLC
//
// Use of this source code is governed by an MIT-style
// license that can be found in the LICENSE file or at
// https://opensource.org/licenses/MIT.
/*!
This module provides a common interface for filter policies that can be provided at database
startup. A filter policy is an object used to create a small filter from a set of keys. These
filters are stored and consulted during reads to determine whether or not to read information from
disk. This can have significant speed savings from removing unnecessary disk seeks on database get
operations.
RainDB provides a Bloom filter based filter policy that should be adequate for most situations.
*/
use integer_encoding::FixedInt;
use std::fmt::{self, Debug};
use std::sync::Arc;
/// Trait to be implemented by filter generating structures for use with RainDB.
pub trait FilterPolicy: Debug + Send + Sync {
/**
The name of the filter policy.
The name of a filter policy is recorded on disk alongside the filter in table files. This means
that if the serialization of this filter changes in any way, the name returned by this method
must be changed. Otherwise, incompatible filters may be passed to methods of this type.
*/
fn get_name(&self) -> String;
/**
Create a filter for the set of keys provided.
The keys potentially have duplicates.
Returns a serialized version of the filter created that should be suitable for writing to a
file.
*/
fn create_filter(&self, keys: &[Vec<u8>]) -> Vec<u8>;
/**
Returns true if the `key` is in the provided filter.
The provided filter is a byte vector directly from the table file and may need to be
deserialized before being operated on.
# Invariants
1. The method must return true if the key was in the list of keys used to the create the filter.
1. The method can return true or false if the key was not on the seed list, but it should aim to
return false with a high probability.
*/
fn key_may_match(
&self,
key: &[u8],
serialized_filter: &[u8],
) -> Result<bool, FilterPolicyError>;
}
/// A Bloom filter based filter policy.
#[derive(Debug)]
pub struct BloomFilterPolicy {
/**
A sizing factor for the Bloom filter that will grow the filter capacity at the specified rate
per key inserted into the filter.
Specifically, `bits_per_key * n keys = the length of the bloom filter`.
A good value for bits per key is 10, which yields a filter with ~1% false positive rate.
*/
bits_per_key: usize,
/**
This is the number of hash functions used for insertion and checking.
The formula to get this is: `k = (m / n) * ln(2)`
where m is the number of bits in the array and n is the number of elements that will be inserted
into the Bloom filter.
*/
num_hash_functions: usize,
}
/// Public methods
impl BloomFilterPolicy {
/// Create a new instance of [`BloomFilterPolicy`].
pub fn new(bits_per_key: usize) -> Self {
// ln(2) is approximately 0.69
let mut num_hash_functions = (bits_per_key as f64 * 0.69).floor() as usize;
if num_hash_functions < 1 {
num_hash_functions = 1
} else if num_hash_functions > 30 {
// The number of hashing functions is rounded down to reduce the probing cost a little
num_hash_functions = 30
}
Self {
bits_per_key,
num_hash_functions,
}
}
}
/// Private methods
impl BloomFilterPolicy {
/**
Generates a 32-bit hash similar to the Murmur hash.
# Legacy
We use wrapping operations to emulate C++ behavior from LevelDB.
*/
fn hash(val: &[u8]) -> u32 {
let seed: u32 = 0xbc9f1d34;
let multiplier: u32 = 0xc6a4a793;
let rotation_factor: u32 = 24;
let val_length = val.len() as u32;
let mut hash: u32 = seed ^ (val_length.wrapping_mul(multiplier));
// Read and process value in groups of 4 bytes
let mut idx: usize = 0;
while idx + 4 <= val.len() {
let word = u32::decode_fixed(&val[idx..idx + 4]);
hash = hash.wrapping_add(word).wrapping_mul(multiplier);
hash ^= hash >> 16;
idx += 4;
}
// Process remaining bytes. There are at most 3 remaining since we processed in 4 byte
// chunks above.
let left_over = val.len() - idx;
let remaining_buf = &val[idx..];
if left_over == 3 {
hash = hash.wrapping_add((remaining_buf[2] as u32) << 16);
}
if left_over >= 2 {
hash = hash.wrapping_add((remaining_buf[1] as u32) << 8);
}
if left_over >= 1 {
hash = hash
.wrapping_add(remaining_buf[0] as u32)
.wrapping_mul(multiplier);
hash ^= hash >> rotation_factor;
}
hash
}
}
impl FilterPolicy for BloomFilterPolicy {
fn get_name(&self) -> String {
"RainDB.BloomFilter".to_string()
}
/**
Create a filter for the set of keys provided.
This implementation follows closely after the LevelDB implementation, particularly in it's usage
of a in-house Murmur like hash and the usage of double hashing. Double hashing was proposed in a
paper by [Kirsch and Mitzenmacher] where by two hashes are composed to simulate additional hash
functions.
However, the results of Kirsh and Mitzenmacher's research may have been accepted too naively.
According to [research by Peter Dillinger], the K&M result is an asymptotic result and does not
necessarily represent real-world patterns. It was found in RocksDB's similar Bloom filter that
the false positive rate could hit a limit and not go below 0.1% false positives.
The returned byte buffer has the following layout:
1. 1-byte representing the number of hash functions used (a.k.a. probers)
1. Bit vector representing the filter
Returns a serialized version of the filter created that should be suitable for writing to a
file.
[Kirsch and Mitzenmacher]: https://www.eecs.harvard.edu/~michaelm/postscripts/tr-02-05.pdf
[research by Peter Dillinger]: https://github.com/facebook/rocksdb/issues/4120#issuecomment-753771906
*/
fn create_filter(&self, keys: &[Vec<u8>]) -> Vec<u8> {
let num_keys = keys.len();
// Compute Bloom filter size
let mut filter_size_bits = num_keys * self.bits_per_key;
if filter_size_bits < 64 {
// There can be a high false positive rate when there are too little bits, so we
// enforce a minimum length.
filter_size_bits = 64;
}
// The additional 7 is to make the division by 8 round up instead of down for integer
// division.
let filter_size_bytes = (filter_size_bits + 7) / 8;
// The initial calculation for the number of bits needed was an exact prescription but we
// need to deal with byte arrays, so we round up based on the calculation above.
filter_size_bits = filter_size_bytes * 8;
let mut hashes: Vec<u8> = vec![0; filter_size_bytes];
for key in keys {
let mut hash = BloomFilterPolicy::hash(key);
// Use double-hashing to generate a sequence of hash values
// The sequence of hashes is generated by adding a delta component of the initial hash
// rotated 17 bits.
let delta: u32 = (hash >> 17) | (hash << 15);
for _ in 0..self.num_hash_functions {
let overall_bit_offset: u32 = hash % (filter_size_bits as u32);
let byte_offset: usize = (overall_bit_offset / 8) as usize;
let bit_offset_in_byte = overall_bit_offset % 8;
hashes[byte_offset] |= 1 << bit_offset_in_byte;
hash = hash.wrapping_add(delta);
}
}
[vec![self.num_hash_functions as u8], hashes].concat()
}
/**
Returns true if the `key` is in the the provided filter.
The provided byte buffer must have the following layout:
1. 1-byte representing the number of hash functions used (a.k.a. probers)
1. Bit vector representing the filter
# Invariants
1. The method must return true if the key was in the list of keys used to the create the filter.
1. The method can return true or false if the key was not on the seed list, but it should aim to
return false with a high probability.
*/
fn key_may_match(
&self,
key: &[u8],
serialized_filter: &[u8],
) -> Result<bool, FilterPolicyError> {
// The filter is invalid. There should always be one element denoting the number of probers
// used per key.
if serialized_filter.len() < 2 {
return Err(FilterPolicyError::Parse(
"The provided filter does not have the proper format for deserialization."
.to_string(),
));
}
let (num_hash_functions, bloom_filter) = serialized_filter.split_first().unwrap();
let filter_length_bits = bloom_filter.len() * 8;
if *num_hash_functions != (self.num_hash_functions as u8) {
log::warn!("The provided filter has an unexpected number of hash functions. Found {} but expected {}. This is not a breaking issue but should be considered an anomoly.",
num_hash_functions,
self.num_hash_functions
);
}
let mut hash = BloomFilterPolicy::hash(key);
// Use double-hashing to generate a sequence of hash values
// The sequence of hashes is generated by adding a delta component of the initial hash
// rotated 17 bits.
let delta: u32 = (hash >> 17) | (hash << 15);
for _ in 0..*num_hash_functions {
let overall_bit_offset = hash % filter_length_bits as u32;
let byte_offset: usize = (overall_bit_offset / 8) as usize;
let bit_offset_in_byte = overall_bit_offset % 8;
if bloom_filter[byte_offset] & (1 << bit_offset_in_byte) == 0 {
return Ok(false);
}
hash = hash.wrapping_add(delta);
}
Ok(true)
}
}
/// Filter policy errors.
#[derive(Debug)]
pub enum FilterPolicyError {
/// Variant for errors deserializing a filter.
Parse(String),
}
impl std::error::Error for FilterPolicyError {}
impl fmt::Display for FilterPolicyError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
FilterPolicyError::Parse(base_err) => write!(f, "{}", base_err),
}
}
}
/// A prefix for filter block keys.
const FILTER_BLOCK_KEY_PREFIX: &str = "filter";
/// Get the serialized name for a filter block in a table file.
pub(crate) fn get_filter_block_name(filter_policy: Arc<dyn FilterPolicy>) -> String {
FILTER_BLOCK_KEY_PREFIX.to_string() + "." + &filter_policy.get_name()
}
#[cfg(test)]
mod bloom_filter_tests {
use super::*;
use pretty_assertions::assert_eq;
#[test]
fn hash_function_produces_expected_hashes() {
// Test values from LevelDB hash_test.cc to check for compatibility
let val1 = [0x62];
let val2 = [0xc3, 0x97];
let val3 = [0xe2, 0x99, 0xa5];
let val4 = [0xe1, 0x80, 0xb9, 0x32];
assert_eq!(BloomFilterPolicy::hash(&[]), 0xbc9f1d34);
assert_eq!(BloomFilterPolicy::hash(&val1), 0xef1345c4);
assert_eq!(BloomFilterPolicy::hash(&val2), 0x5b663814);
assert_eq!(BloomFilterPolicy::hash(&val3), 0x323c078f);
assert_eq!(BloomFilterPolicy::hash(&val4), 0xed21633a);
}
#[test]
fn with_an_empty_filter_returns_false_for_any_request() {
let bloom_filter_policy = BloomFilterPolicy::new(10);
let filter = bloom_filter_policy.create_filter(&[]);
assert!(!bloom_filter_policy
.key_may_match(b"hello", filter.as_slice())
.unwrap());
assert!(!bloom_filter_policy
.key_may_match(b"world", filter.as_slice())
.unwrap());
}
#[test]
fn with_an_small_filter_returns_expected_responses() {
let bloom_filter_policy = BloomFilterPolicy::new(10);
let keys = [b"hello".to_vec(), b"world".to_vec()];
let filter = bloom_filter_policy.create_filter(&keys);
assert!(bloom_filter_policy
.key_may_match(b"hello", filter.as_slice())
.unwrap());
assert!(bloom_filter_policy
.key_may_match(b"world", filter.as_slice())
.unwrap());
assert!(!bloom_filter_policy
.key_may_match(b"batmann", filter.as_slice())
.unwrap());
assert!(!bloom_filter_policy
.key_may_match(b"robin", filter.as_slice())
.unwrap());
}
#[test]
fn bloom_filters_are_created_with_acceptable_false_positive_rates_for_varying_lengths() {
const BITS_PER_KEY: usize = 10;
let bloom_filter_policy = BloomFilterPolicy::new(BITS_PER_KEY);
// The number of filters that significantly exceed the false positive rate
let mut num_mediocre_filters: usize = 0;
let mut num_good_filters: usize = 0;
let mut num_keys: usize = 1;
while num_keys <= 10_000 {
// Build up keys we want to build the filter for
let mut keys: Vec<Vec<u8>> = Vec::with_capacity(num_keys);
for idx in 0..num_keys {
keys.push(u32::encode_fixed_vec(idx as u32));
}
let filter = bloom_filter_policy.create_filter(&keys);
// Calculate filter size based on `BITS_PER_KEY`
let filter_size: usize = ((num_keys * 10) / 8) + 40;
assert!(filter.len() <= filter_size);
// All added keys must be found in the filter
for idx in 0..num_keys {
let key = u32::encode_fixed_vec(idx as u32);
assert!(
bloom_filter_policy.key_may_match(&key, &filter).unwrap(),
"Expected to find {idx} in the filter."
);
}
// Check that false positive rate is within acceptable bounds i.e. at most 2%
let false_positive_rate = check_false_positive_rate(&bloom_filter_policy, &filter);
assert!(
false_positive_rate <= 0.02,
"The false positive rate exceeded 2%. Num keys {num_keys} had a false positive \
rate of {false_positive_rate}."
);
if false_positive_rate > 0.0125 {
num_mediocre_filters += 1;
} else {
num_good_filters += 1;
}
num_keys = next_num_keys(num_keys);
}
println!(
"There are {num_mediocre_filters} mediocre filters and {num_good_filters} good \
filters."
);
assert!(
num_mediocre_filters <= (num_good_filters / 5),
"If there are any mediocre filters, there must be at least 5 times as many good \
filters (i.e. filters that have < 1.25% false positive rate)."
);
}
/// Get the next filter length for the false positive test.
fn next_num_keys(prev_num_keys: usize) -> usize {
let mut num_keys = prev_num_keys;
if num_keys < 10 {
num_keys += 1
} else if num_keys < 100 {
num_keys += 10;
} else if num_keys < 1000 {
num_keys += 100;
} else {
num_keys += 1000;
}
num_keys
}
fn check_false_positive_rate(policy: &BloomFilterPolicy, filter: &[u8]) -> f64 {
const NUM_ITERATIONS: usize = 10_000;
let mut matches: f64 = 0.0;
for idx in 0..NUM_ITERATIONS {
if policy
.key_may_match(&u32::encode_fixed_vec(idx as u32 + 1_000_000_000), filter)
.unwrap()
{
matches += 1.0;
}
}
matches / (NUM_ITERATIONS as f64)
}
}