use crate::FilterPolicy;
pub(crate) fn hash(data: &[u8], seed: u32) -> u32 {
const M: u32 = 0xc6a4_a793;
const R: u32 = 24;
let mut h = seed ^ ((data.len() as u64) * M as u64) as u32;
let mut i = 0;
while i + 4 <= data.len() {
let chunk = unsafe {
let ptr = data.as_ptr().add(i) as *const u32;
ptr.read_unaligned().to_be().to_be_bytes()
};
h = h.wrapping_add(u32::from_be_bytes(chunk));
h = h.wrapping_mul(M);
h ^= h >> 16;
i += 4;
}
match data.len() - i {
3 => {
h = h.wrapping_add((data[i + 2] as u32) << 16);
h = h.wrapping_add((data[i + 1] as u32) << 8);
h = h.wrapping_add(data[i] as u32);
}
2 => {
h = h.wrapping_add((data[i + 1] as u32) << 8);
h = h.wrapping_add(data[i] as u32);
}
1 => {
h = h.wrapping_add(data[i] as u32);
}
_ => {}
}
h = h.wrapping_mul(M);
h ^= h >> R;
h
}
pub(crate) struct LevelDBBloomFilter {
bits_per_key: usize,
}
impl LevelDBBloomFilter {
pub(crate) fn new(bits_per_key: usize) -> Self {
Self {
bits_per_key,
}
}
fn bloom_hash(key: &[u8]) -> u32 {
hash(key, 0xbc9f_1d34)
}
}
impl FilterPolicy for LevelDBBloomFilter {
fn name(&self) -> &str {
"leveldb.BloomFilter"
}
fn create_filter(&self, keys: &[Vec<u8>]) -> Vec<u8> {
let n = keys.len();
if n == 0 {
return vec![];
}
let bits = n * self.bits_per_key;
let bytes = bits.div_ceil(8);
let bits = bytes * 8;
let mut filter = vec![0u8; bytes + 1];
let k = (((self.bits_per_key as f64) * 0.7) as u32).clamp(1, 30);
for key in keys {
let h = Self::bloom_hash(key);
let delta = h.rotate_left(15);
let mut hash = h;
for _ in 0..k {
let bit_pos = (hash % (bits as u32)) as usize;
filter[bit_pos / 8] |= 1 << (bit_pos % 8);
hash = hash.wrapping_add(delta);
}
}
filter[bytes] = k as u8;
filter
}
fn may_contain(&self, filter: &[u8], key: &[u8]) -> bool {
let bytes = filter.len();
if bytes < 2 {
return false;
}
let k = filter[bytes - 1] as u32;
if k > 30 {
return true;
}
let bits = (bytes - 1) * 8;
let h = Self::bloom_hash(key);
let delta = h.rotate_left(15);
let mut hash = h;
for _ in 0..k {
let bit_pos = (hash % (bits as u32)) as usize;
if (filter[bit_pos / 8] & (1 << (bit_pos % 8))) == 0 {
return false;
}
hash = hash.wrapping_add(delta);
}
true
}
}
#[cfg(test)]
mod tests {
use test_log::test;
use super::*;
#[test]
fn test_bloom_filter_creation() {
let filter = LevelDBBloomFilter::new(10);
assert_eq!(filter.bits_per_key, 10);
let filter = LevelDBBloomFilter::new(100);
assert_eq!(filter.bits_per_key, 100);
let filter = LevelDBBloomFilter::new(1);
assert_eq!(filter.bits_per_key, 1);
}
#[test]
fn test_bloom_filter_may_contain() {
let filter = LevelDBBloomFilter::new(10);
let keys = vec![b"key1".to_vec(), b"key2".to_vec(), b"key3".to_vec()];
let bloom_filter = filter.create_filter(&keys);
assert!(filter.may_contain(&bloom_filter, b"key1"));
assert!(filter.may_contain(&bloom_filter, b"key2"));
assert!(filter.may_contain(&bloom_filter, b"key3"));
assert!(!filter.may_contain(&bloom_filter, b"key4"));
}
#[test]
fn test_bloom_filter_empty() {
let filter = LevelDBBloomFilter::new(10);
let empty_filter = vec![];
assert!(!filter.may_contain(&empty_filter, b"key1"));
}
#[test]
fn test_bloom_filter_invalid_num_hashes() {
let filter = LevelDBBloomFilter::new(10);
let mut bloom_filter = filter.create_filter(&[b"key1".to_vec()]);
let bloom_filter_len = bloom_filter.len();
bloom_filter[bloom_filter_len - 1] = 31; assert!(filter.may_contain(&bloom_filter, b"key1"));
}
}