#include "rocksdb/filter_policy.h"
#include "rocksdb/slice.h"
#include "table/block_based_filter_block.h"
#include "table/full_filter_block.h"
#include "util/hash.h"
#include "util/coding.h"
namespace rocksdb {
class BlockBasedFilterBlockBuilder;
class FullFilterBlockBuilder;
namespace {
class FullFilterBitsBuilder : public FilterBitsBuilder {
public:
explicit FullFilterBitsBuilder(const size_t bits_per_key,
const size_t num_probes)
: bits_per_key_(bits_per_key),
num_probes_(num_probes) {
assert(bits_per_key_);
}
~FullFilterBitsBuilder() {}
virtual void AddKey(const Slice& key) override {
uint32_t hash = BloomHash(key);
if (hash_entries_.size() == 0 || hash != hash_entries_.back()) {
hash_entries_.push_back(hash);
}
}
virtual Slice Finish(std::unique_ptr<const char[]>* buf) override {
uint32_t total_bits, num_lines;
char* data = ReserveSpace(static_cast<int>(hash_entries_.size()),
&total_bits, &num_lines);
assert(data);
if (total_bits != 0 && num_lines != 0) {
for (auto h : hash_entries_) {
AddHash(h, data, num_lines, total_bits);
}
}
data[total_bits/8] = static_cast<char>(num_probes_);
EncodeFixed32(data + total_bits/8 + 1, static_cast<uint32_t>(num_lines));
const char* const_data = data;
buf->reset(const_data);
hash_entries_.clear();
return Slice(data, total_bits / 8 + 5);
}
private:
size_t bits_per_key_;
size_t num_probes_;
std::vector<uint32_t> hash_entries_;
uint32_t GetTotalBitsForLocality(uint32_t total_bits);
char* ReserveSpace(const int num_entry, uint32_t* total_bits,
uint32_t* num_lines);
void AddHash(uint32_t h, char* data, uint32_t num_lines,
uint32_t total_bits);
FullFilterBitsBuilder(const FullFilterBitsBuilder&);
void operator=(const FullFilterBitsBuilder&);
};
uint32_t FullFilterBitsBuilder::GetTotalBitsForLocality(uint32_t total_bits) {
uint32_t num_lines =
(total_bits + CACHE_LINE_SIZE * 8 - 1) / (CACHE_LINE_SIZE * 8);
if (num_lines % 2 == 0) {
num_lines++;
}
return num_lines * (CACHE_LINE_SIZE * 8);
}
char* FullFilterBitsBuilder::ReserveSpace(const int num_entry,
uint32_t* total_bits, uint32_t* num_lines) {
assert(bits_per_key_);
char* data = nullptr;
if (num_entry != 0) {
uint32_t total_bits_tmp = num_entry * static_cast<uint32_t>(bits_per_key_);
*total_bits = GetTotalBitsForLocality(total_bits_tmp);
*num_lines = *total_bits / (CACHE_LINE_SIZE * 8);
assert(*total_bits > 0 && *total_bits % 8 == 0);
} else {
*total_bits = 0;
*num_lines = 0;
}
uint32_t sz = *total_bits / 8;
sz += 5;
data = new char[sz];
memset(data, 0, sz);
return data;
}
inline void FullFilterBitsBuilder::AddHash(uint32_t h, char* data,
uint32_t num_lines, uint32_t total_bits) {
assert(num_lines > 0 && total_bits > 0);
const uint32_t delta = (h >> 17) | (h << 15); uint32_t b = (h % num_lines) * (CACHE_LINE_SIZE * 8);
for (uint32_t i = 0; i < num_probes_; ++i) {
const uint32_t bitpos = b + (h % (CACHE_LINE_SIZE * 8));
data[bitpos / 8] |= (1 << (bitpos % 8));
h += delta;
}
}
class FullFilterBitsReader : public FilterBitsReader {
public:
explicit FullFilterBitsReader(const Slice& contents)
: data_(const_cast<char*>(contents.data())),
data_len_(static_cast<uint32_t>(contents.size())),
num_probes_(0),
num_lines_(0) {
assert(data_);
GetFilterMeta(contents, &num_probes_, &num_lines_);
if (num_lines_ != 0 && (data_len_-5) % num_lines_ != 0) {
num_lines_ = 0;
num_probes_ = 0;
}
}
~FullFilterBitsReader() {}
virtual bool MayMatch(const Slice& entry) override {
if (data_len_ <= 5) { return false;
}
if (num_probes_ == 0 || num_lines_ == 0) return true;
uint32_t hash = BloomHash(entry);
return HashMayMatch(hash, Slice(data_, data_len_),
num_probes_, num_lines_);
}
private:
char* data_;
uint32_t data_len_;
size_t num_probes_;
uint32_t num_lines_;
void GetFilterMeta(const Slice& filter, size_t* num_probes,
uint32_t* num_lines);
bool HashMayMatch(const uint32_t& hash, const Slice& filter,
const size_t& num_probes, const uint32_t& num_lines);
FullFilterBitsReader(const FullFilterBitsReader&);
void operator=(const FullFilterBitsReader&);
};
void FullFilterBitsReader::GetFilterMeta(const Slice& filter,
size_t* num_probes, uint32_t* num_lines) {
uint32_t len = static_cast<uint32_t>(filter.size());
if (len <= 5) {
*num_probes = 0;
*num_lines = 0;
return;
}
*num_probes = filter.data()[len - 5];
*num_lines = DecodeFixed32(filter.data() + len - 4);
}
bool FullFilterBitsReader::HashMayMatch(const uint32_t& hash,
const Slice& filter, const size_t& num_probes,
const uint32_t& num_lines) {
uint32_t len = static_cast<uint32_t>(filter.size());
if (len <= 5) return false;
assert(num_probes != 0);
assert(num_lines != 0 && (len - 5) % num_lines == 0);
uint32_t cache_line_size = (len - 5) / num_lines;
const char* data = filter.data();
uint32_t h = hash;
const uint32_t delta = (h >> 17) | (h << 15); uint32_t b = (h % num_lines) * (cache_line_size * 8);
for (uint32_t i = 0; i < num_probes; ++i) {
const uint32_t bitpos = b + (h % (cache_line_size * 8));
if (((data[bitpos / 8]) & (1 << (bitpos % 8))) == 0) {
return false;
}
h += delta;
}
return true;
}
class BloomFilterPolicy : public FilterPolicy {
public:
explicit BloomFilterPolicy(int bits_per_key, bool use_block_based_builder)
: bits_per_key_(bits_per_key), hash_func_(BloomHash),
use_block_based_builder_(use_block_based_builder) {
initialize();
}
~BloomFilterPolicy() {
}
virtual const char* Name() const override {
return "rocksdb.BuiltinBloomFilter";
}
virtual void CreateFilter(const Slice* keys, int n,
std::string* dst) const override {
size_t bits = n * bits_per_key_;
if (bits < 64) bits = 64;
size_t bytes = (bits + 7) / 8;
bits = bytes * 8;
const size_t init_size = dst->size();
dst->resize(init_size + bytes, 0);
dst->push_back(static_cast<char>(num_probes_)); char* array = &(*dst)[init_size];
for (size_t i = 0; i < (size_t)n; i++) {
uint32_t h = hash_func_(keys[i]);
const uint32_t delta = (h >> 17) | (h << 15); for (size_t j = 0; j < num_probes_; j++) {
const uint32_t bitpos = h % bits;
array[bitpos/8] |= (1 << (bitpos % 8));
h += delta;
}
}
}
virtual bool KeyMayMatch(const Slice& key,
const Slice& bloom_filter) const override {
const size_t len = bloom_filter.size();
if (len < 2) return false;
const char* array = bloom_filter.data();
const size_t bits = (len - 1) * 8;
const size_t k = array[len-1];
if (k > 30) {
return true;
}
uint32_t h = hash_func_(key);
const uint32_t delta = (h >> 17) | (h << 15); for (size_t j = 0; j < k; j++) {
const uint32_t bitpos = h % bits;
if ((array[bitpos/8] & (1 << (bitpos % 8))) == 0) return false;
h += delta;
}
return true;
}
virtual FilterBitsBuilder* GetFilterBitsBuilder() const override {
if (use_block_based_builder_) {
return nullptr;
}
return new FullFilterBitsBuilder(bits_per_key_, num_probes_);
}
virtual FilterBitsReader* GetFilterBitsReader(const Slice& contents)
const override {
return new FullFilterBitsReader(contents);
}
bool UseBlockBasedBuilder() { return use_block_based_builder_; }
private:
size_t bits_per_key_;
size_t num_probes_;
uint32_t (*hash_func_)(const Slice& key);
const bool use_block_based_builder_;
void initialize() {
num_probes_ = static_cast<size_t>(bits_per_key_ * 0.69); if (num_probes_ < 1) num_probes_ = 1;
if (num_probes_ > 30) num_probes_ = 30;
}
};
}
const FilterPolicy* NewBloomFilterPolicy(int bits_per_key,
bool use_block_based_builder) {
return new BloomFilterPolicy(bits_per_key, use_block_based_builder);
}
}