#ifndef GFLAGS
#include <cstdio>
int main() {
fprintf(stderr, "Please install gflags to run this test... Skipping...\n");
return 0;
}
#else
#include <array>
#include <cmath>
#include <vector>
#include "cache/cache_entry_roles.h"
#include "cache/cache_reservation_manager.h"
#include "memory/arena.h"
#include "port/jemalloc_helper.h"
#include "rocksdb/convenience.h"
#include "rocksdb/filter_policy.h"
#include "table/block_based/filter_policy_internal.h"
#include "test_util/testharness.h"
#include "test_util/testutil.h"
#include "util/gflags_compat.h"
#include "util/hash.h"
using GFLAGS_NAMESPACE::ParseCommandLineFlags;
DEFINE_int32(bits_per_key, 10, "");
namespace ROCKSDB_NAMESPACE {
namespace {
const std::string kLegacyBloom = test::LegacyBloomFilterPolicy::kClassName();
const std::string kFastLocalBloom =
test::FastLocalBloomFilterPolicy::kClassName();
const std::string kStandard128Ribbon =
test::Standard128RibbonFilterPolicy::kClassName();
}
static const int kVerbose = 1;
static Slice Key(int i, char* buffer) {
std::string s;
PutFixed32(&s, static_cast<uint32_t>(i));
memcpy(buffer, s.c_str(), sizeof(i));
return Slice(buffer, sizeof(i));
}
static int NextLength(int length) {
if (length < 10) {
length += 1;
} else if (length < 100) {
length += 10;
} else if (length < 1000) {
length += 100;
} else {
length += 1000;
}
return length;
}
class FullBloomTest : public testing::TestWithParam<std::string> {
protected:
BlockBasedTableOptions table_options_;
private:
std::shared_ptr<const FilterPolicy>& policy_;
std::unique_ptr<FilterBitsBuilder> bits_builder_;
std::unique_ptr<FilterBitsReader> bits_reader_;
std::unique_ptr<const char[]> buf_;
size_t filter_size_;
public:
FullBloomTest() : policy_(table_options_.filter_policy), filter_size_(0) {
ResetPolicy();
}
FilterBitsBuilder* GetFilterBitsBuilder() { return bits_builder_.get(); }
const BloomLikeFilterPolicy* GetBloomLikeFilterPolicy() {
return &dynamic_cast<const BloomLikeFilterPolicy&>(*policy_);
}
void Reset() {
bits_builder_.reset(BloomFilterPolicy::GetBuilderFromContext(
FilterBuildingContext(table_options_)));
bits_reader_.reset(nullptr);
buf_.reset(nullptr);
filter_size_ = 0;
}
void ResetPolicy(double bits_per_key) {
policy_ = BloomLikeFilterPolicy::Create(GetParam(), bits_per_key);
Reset();
}
void ResetPolicy() { ResetPolicy(FLAGS_bits_per_key); }
void Add(const Slice& s) { bits_builder_->AddKey(s); }
void OpenRaw(const Slice& s) {
bits_reader_.reset(policy_->GetFilterBitsReader(s));
}
void Build() {
Slice filter = bits_builder_->Finish(&buf_);
bits_reader_.reset(policy_->GetFilterBitsReader(filter));
filter_size_ = filter.size();
}
size_t FilterSize() const { return filter_size_; }
Slice FilterData() { return Slice(buf_.get(), filter_size_); }
int GetNumProbesFromFilterData() {
assert(filter_size_ >= 5);
int8_t raw_num_probes = static_cast<int8_t>(buf_.get()[filter_size_ - 5]);
if (raw_num_probes == -1) { return static_cast<uint8_t>(buf_.get()[filter_size_ - 3]);
} else {
return raw_num_probes;
}
}
int GetRibbonSeedFromFilterData() {
assert(filter_size_ >= 5);
assert(-2 == static_cast<int8_t>(buf_.get()[filter_size_ - 5]));
return static_cast<uint8_t>(buf_.get()[filter_size_ - 4]);
}
bool Matches(const Slice& s) {
if (bits_reader_ == nullptr) {
Build();
}
return bits_reader_->MayMatch(s);
}
uint64_t PackedMatches() {
char buffer[sizeof(int)];
uint64_t result = 0;
for (int i = 0; i < 64; i++) {
if (Matches(Key(i + 12345, buffer))) {
result |= uint64_t{1} << i;
}
}
return result;
}
std::string FirstFPs(int count) {
char buffer[sizeof(int)];
std::string rv;
int fp_count = 0;
for (int i = 0; i < 1000000; i++) {
if (Matches(Key(i + 1000000, buffer))) {
++fp_count;
rv += std::to_string(i);
if (fp_count == count) {
break;
}
rv += ',';
}
}
return rv;
}
double FalsePositiveRate() {
char buffer[sizeof(int)];
int result = 0;
for (int i = 0; i < 10000; i++) {
if (Matches(Key(i + 1000000000, buffer))) {
result++;
}
}
return result / 10000.0;
}
};
TEST_P(FullBloomTest, FilterSize) {
bool some_computed_less_than_denoted = false;
for (auto bpk : std::vector<std::pair<double, int> >{{-HUGE_VAL, 0},
{-INFINITY, 0},
{0.0, 0},
{0.499, 0},
{0.5, 1000},
{1.234, 1234},
{3.456, 3456},
{9.5, 9500},
{10.0, 10000},
{10.499, 10499},
{21.345, 21345},
{99.999, 99999},
{1234.0, 100000},
{HUGE_VAL, 100000},
{INFINITY, 100000},
{NAN, 100000}}) {
ResetPolicy(bpk.first);
auto bfp = GetBloomLikeFilterPolicy();
EXPECT_EQ(bpk.second, bfp->GetMillibitsPerKey());
EXPECT_EQ((bpk.second + 500) / 1000, bfp->GetWholeBitsPerKey());
double computed = bpk.first;
computed += 0.5;
computed /= 1234567.0;
computed *= 1234567.0;
computed -= 0.5;
some_computed_less_than_denoted |= (computed < bpk.first);
ResetPolicy(computed);
bfp = GetBloomLikeFilterPolicy();
EXPECT_EQ(bpk.second, bfp->GetMillibitsPerKey());
EXPECT_EQ((bpk.second + 500) / 1000, bfp->GetWholeBitsPerKey());
auto bits_builder = GetFilterBitsBuilder();
if (bpk.second == 0) {
ASSERT_EQ(bits_builder, nullptr);
continue;
}
size_t n = 1;
size_t space = 0;
for (; n < 1000000; n += 1 + n / 1000) {
space = bits_builder->CalculateSpace(n);
size_t n2 = bits_builder->ApproximateNumEntries(space);
EXPECT_GE(n2, n);
size_t space2 = bits_builder->CalculateSpace(n2);
if (n > 12000 && GetParam() == kStandard128Ribbon) {
EXPECT_GE(space2, space);
EXPECT_LE(space2 * 0.998, space * 1.0);
} else {
EXPECT_EQ(space2, space);
}
}
for (; n < (n + n / 3); n += n / 3) {
size_t space2 = bits_builder->CalculateSpace(n);
EXPECT_GE(space2, space);
space = space2;
}
}
EXPECT_TRUE(some_computed_less_than_denoted);
ResetPolicy();
}
TEST_P(FullBloomTest, FullEmptyFilter) {
ASSERT_TRUE(!Matches("hello"));
ASSERT_TRUE(!Matches("world"));
}
TEST_P(FullBloomTest, FullSmall) {
Add("hello");
Add("world");
ASSERT_TRUE(Matches("hello"));
ASSERT_TRUE(Matches("world"));
ASSERT_TRUE(!Matches("x"));
ASSERT_TRUE(!Matches("foo"));
}
TEST_P(FullBloomTest, FullVaryingLengths) {
table_options_.optimize_filters_for_memory = false;
char buffer[sizeof(int)];
int mediocre_filters = 0;
int good_filters = 0;
for (int length = 1; length <= 10000; length = NextLength(length)) {
Reset();
for (int i = 0; i < length; i++) {
Add(Key(i, buffer));
}
Build();
EXPECT_LE(FilterSize(), (size_t)((length * FLAGS_bits_per_key / 8) +
CACHE_LINE_SIZE * 2 + 5));
for (int i = 0; i < length; i++) {
ASSERT_TRUE(Matches(Key(i, buffer)))
<< "Length " << length << "; key " << i;
}
double rate = FalsePositiveRate();
if (kVerbose >= 1) {
fprintf(stderr, "False positives: %5.2f%% @ length = %6d ; bytes = %6d\n",
rate * 100.0, length, static_cast<int>(FilterSize()));
}
if (FLAGS_bits_per_key == 10) {
EXPECT_LE(rate, 0.02); if (rate > 0.0125) {
mediocre_filters++; } else {
good_filters++;
}
}
}
if (kVerbose >= 1) {
fprintf(stderr, "Filters: %d good, %d mediocre\n", good_filters,
mediocre_filters);
}
EXPECT_LE(mediocre_filters, good_filters / 5);
}
TEST_P(FullBloomTest, OptimizeForMemory) {
EXPECT_EQ(BlockBasedTableOptions().optimize_filters_for_memory, true);
char buffer[sizeof(int)];
for (bool offm : {true, false}) {
table_options_.optimize_filters_for_memory = offm;
ResetPolicy();
Random32 rnd(12345);
uint64_t total_size = 0;
uint64_t total_mem = 0;
int64_t total_keys = 0;
double total_fp_rate = 0;
constexpr int nfilters = 100;
for (int i = 0; i < nfilters; ++i) {
int nkeys = static_cast<int>(rnd.Uniformish(10000)) + 100;
Reset();
for (int j = 0; j < nkeys; ++j) {
Add(Key(j, buffer));
}
Build();
size_t size = FilterData().size();
total_size += size;
#ifdef ROCKSDB_MALLOC_USABLE_SIZE
size = malloc_usable_size(const_cast<char*>(FilterData().data()));
#endif total_mem += size;
total_keys += nkeys;
total_fp_rate += FalsePositiveRate();
}
if (FLAGS_bits_per_key == 10) {
EXPECT_LE(total_fp_rate / double{nfilters}, 0.011);
EXPECT_GE(total_fp_rate / double{nfilters},
CACHE_LINE_SIZE >= 256 ? 0.007 : 0.008);
}
int64_t ex_min_total_size = int64_t{FLAGS_bits_per_key} * total_keys / 8;
if (GetParam() == kStandard128Ribbon) {
ex_min_total_size = 7 * ex_min_total_size / 10;
}
EXPECT_GE(static_cast<int64_t>(total_size), ex_min_total_size);
int64_t blocked_bloom_overhead = nfilters * (CACHE_LINE_SIZE + 5);
if (GetParam() == kLegacyBloom) {
blocked_bloom_overhead += nfilters * CACHE_LINE_SIZE;
}
EXPECT_GE(total_mem, total_size);
if (offm && GetParam() != kLegacyBloom) {
fprintf(stderr, "Internal fragmentation (optimized): %g%%\n",
(total_mem - total_size) * 100.0 / total_size);
EXPECT_LE(total_mem, total_size * 101 / 100);
EXPECT_LE(static_cast<int64_t>(total_size),
ex_min_total_size * 102 / 100 + blocked_bloom_overhead);
} else {
fprintf(stderr, "Internal fragmentation (not optimized): %g%%\n",
(total_mem - total_size) * 100.0 / total_size);
#ifdef ROCKSDB_JEMALLOC
fprintf(stderr, "Jemalloc detected? %d\n", HasJemalloc());
if (HasJemalloc()) {
#ifdef ROCKSDB_MALLOC_USABLE_SIZE
EXPECT_GE(total_mem, total_size * 105 / 100);
#endif }
#endif EXPECT_LE(static_cast<int64_t>(total_size),
ex_min_total_size + blocked_bloom_overhead);
}
}
}
class ChargeFilterConstructionTest : public testing::Test {};
TEST_F(ChargeFilterConstructionTest, RibbonFilterFallBackOnLargeBanding) {
constexpr std::size_t kCacheCapacity =
8 * CacheReservationManagerImpl<
CacheEntryRole::kFilterConstruction>::GetDummyEntrySize();
constexpr std::size_t num_entries_for_cache_full = kCacheCapacity / 8;
for (CacheEntryRoleOptions::Decision charge_filter_construction_mem :
{CacheEntryRoleOptions::Decision::kEnabled,
CacheEntryRoleOptions::Decision::kDisabled}) {
bool will_fall_back = charge_filter_construction_mem ==
CacheEntryRoleOptions::Decision::kEnabled;
BlockBasedTableOptions table_options;
table_options.cache_usage_options.options_overrides.insert(
{CacheEntryRole::kFilterConstruction,
{ charge_filter_construction_mem}});
LRUCacheOptions lo;
lo.capacity = kCacheCapacity;
lo.num_shard_bits = 0; lo.strict_capacity_limit = true;
std::shared_ptr<Cache> cache(NewLRUCache(lo));
table_options.block_cache = cache;
table_options.filter_policy =
BloomLikeFilterPolicy::Create(kStandard128Ribbon, FLAGS_bits_per_key);
FilterBuildingContext ctx(table_options);
std::unique_ptr<FilterBitsBuilder> filter_bits_builder(
table_options.filter_policy->GetBuilderWithContext(ctx));
char key_buffer[sizeof(int)];
for (std::size_t i = 0; i < num_entries_for_cache_full; ++i) {
filter_bits_builder->AddKey(Key(static_cast<int>(i), key_buffer));
}
std::unique_ptr<const char[]> buf;
Slice filter = filter_bits_builder->Finish(&buf);
if (will_fall_back) {
EXPECT_EQ(filter.data()[filter.size() - 5], static_cast<char>(-1));
} else {
EXPECT_EQ(filter.data()[filter.size() - 5], static_cast<char>(-2));
}
if (charge_filter_construction_mem ==
CacheEntryRoleOptions::Decision::kEnabled) {
const size_t dummy_entry_num = static_cast<std::size_t>(std::ceil(
filter.size() * 1.0 /
CacheReservationManagerImpl<
CacheEntryRole::kFilterConstruction>::GetDummyEntrySize()));
EXPECT_GE(
cache->GetPinnedUsage(),
dummy_entry_num *
CacheReservationManagerImpl<
CacheEntryRole::kFilterConstruction>::GetDummyEntrySize());
EXPECT_LT(
cache->GetPinnedUsage(),
(dummy_entry_num + 1) *
CacheReservationManagerImpl<
CacheEntryRole::kFilterConstruction>::GetDummyEntrySize());
} else {
EXPECT_EQ(cache->GetPinnedUsage(), 0);
}
}
}
namespace {
inline uint32_t SelectByCacheLineSize(uint32_t for64, uint32_t for128,
uint32_t for256) {
(void)for64;
(void)for128;
(void)for256;
#if CACHE_LINE_SIZE == 64
return for64;
#elif CACHE_LINE_SIZE == 128
return for128;
#elif CACHE_LINE_SIZE == 256
return for256;
#else
#error "CACHE_LINE_SIZE unknown or unrecognized"
#endif
}
}
TEST_P(FullBloomTest, Schema) {
table_options_.optimize_filters_for_memory = false;
#define EXPECT_EQ_Bloom(a, b) \
{ \
if (GetParam() != kStandard128Ribbon) { \
EXPECT_EQ(a, b); \
} \
}
#define EXPECT_EQ_Ribbon(a, b) \
{ \
if (GetParam() == kStandard128Ribbon) { \
EXPECT_EQ(a, b); \
} \
}
#define EXPECT_EQ_FastBloom(a, b) \
{ \
if (GetParam() == kFastLocalBloom) { \
EXPECT_EQ(a, b); \
} \
}
#define EXPECT_EQ_LegacyBloom(a, b) \
{ \
if (GetParam() == kLegacyBloom) { \
EXPECT_EQ(a, b); \
} \
}
#define EXPECT_EQ_NotLegacy(a, b) \
{ \
if (GetParam() != kLegacyBloom) { \
EXPECT_EQ(a, b); \
} \
}
char buffer[sizeof(int)];
ResetPolicy(5); for (int key = 0; key < 87; key++) {
Add(Key(key, buffer));
}
Build();
EXPECT_EQ(GetNumProbesFromFilterData(), 3);
EXPECT_EQ_NotLegacy(BloomHash(FilterData()), 4130687756U);
EXPECT_EQ_NotLegacy("31,38,40,43,61,83,86,112,125,131", FirstFPs(10));
ResetPolicy(2); for (int key = 0; key < 2087; key++) {
Add(Key(key, buffer));
}
Build();
EXPECT_EQ_Bloom(GetNumProbesFromFilterData(), 1);
EXPECT_EQ_Ribbon(GetRibbonSeedFromFilterData(), 61);
EXPECT_EQ_LegacyBloom(
BloomHash(FilterData()),
SelectByCacheLineSize(1567096579, 1964771444, 2659542661U));
EXPECT_EQ_FastBloom(BloomHash(FilterData()), 3817481309U);
EXPECT_EQ_Ribbon(BloomHash(FilterData()), 1705851228U);
EXPECT_EQ_FastBloom("11,13,17,25,29,30,35,37,45,53", FirstFPs(10));
EXPECT_EQ_Ribbon("3,8,10,17,19,20,23,28,31,32", FirstFPs(10));
ResetPolicy(3); for (int key = 0; key < 2087; key++) {
Add(Key(key, buffer));
}
Build();
EXPECT_EQ_Bloom(GetNumProbesFromFilterData(), 2);
EXPECT_EQ_Ribbon(GetRibbonSeedFromFilterData(), 61);
EXPECT_EQ_LegacyBloom(
BloomHash(FilterData()),
SelectByCacheLineSize(2707206547U, 2571983456U, 218344685));
EXPECT_EQ_FastBloom(BloomHash(FilterData()), 2807269961U);
EXPECT_EQ_Ribbon(BloomHash(FilterData()), 1095342358U);
EXPECT_EQ_FastBloom("4,15,17,24,27,28,29,53,63,70", FirstFPs(10));
EXPECT_EQ_Ribbon("3,17,20,28,32,33,36,43,49,54", FirstFPs(10));
ResetPolicy(5); for (int key = 0; key < 2087; key++) {
Add(Key(key, buffer));
}
Build();
EXPECT_EQ_Bloom(GetNumProbesFromFilterData(), 3);
EXPECT_EQ_Ribbon(GetRibbonSeedFromFilterData(), 61);
EXPECT_EQ_LegacyBloom(
BloomHash(FilterData()),
SelectByCacheLineSize(515748486, 94611728, 2436112214U));
EXPECT_EQ_FastBloom(BloomHash(FilterData()), 204628445U);
EXPECT_EQ_Ribbon(BloomHash(FilterData()), 3971337699U);
EXPECT_EQ_FastBloom("15,24,29,39,53,87,89,100,103,104", FirstFPs(10));
EXPECT_EQ_Ribbon("3,33,36,43,67,70,76,78,84,102", FirstFPs(10));
ResetPolicy(8); for (int key = 0; key < 2087; key++) {
Add(Key(key, buffer));
}
Build();
EXPECT_EQ_Bloom(GetNumProbesFromFilterData(), 5);
EXPECT_EQ_Ribbon(GetRibbonSeedFromFilterData(), 61);
EXPECT_EQ_LegacyBloom(
BloomHash(FilterData()),
SelectByCacheLineSize(1302145999, 2811644657U, 756553699));
EXPECT_EQ_FastBloom(BloomHash(FilterData()), 355564975U);
EXPECT_EQ_Ribbon(BloomHash(FilterData()), 3651449053U);
EXPECT_EQ_FastBloom("16,60,66,126,220,238,244,256,265,287", FirstFPs(10));
EXPECT_EQ_Ribbon("33,187,203,296,300,322,411,419,547,582", FirstFPs(10));
ResetPolicy(9); for (int key = 0; key < 2087; key++) {
Add(Key(key, buffer));
}
Build();
EXPECT_EQ_Bloom(GetNumProbesFromFilterData(), 6);
EXPECT_EQ_Ribbon(GetRibbonSeedFromFilterData(), 61);
EXPECT_EQ_LegacyBloom(
BloomHash(FilterData()),
SelectByCacheLineSize(2092755149, 661139132, 1182970461));
EXPECT_EQ_FastBloom(BloomHash(FilterData()), 2137566013U);
EXPECT_EQ_Ribbon(BloomHash(FilterData()), 1005676675U);
EXPECT_EQ_FastBloom("156,367,791,872,945,1015,1139,1159,1265", FirstFPs(9));
EXPECT_EQ_Ribbon("33,187,203,296,411,419,604,612,615,619", FirstFPs(10));
ResetPolicy(11); for (int key = 0; key < 2087; key++) {
Add(Key(key, buffer));
}
Build();
EXPECT_EQ_Bloom(GetNumProbesFromFilterData(), 7);
EXPECT_EQ_Ribbon(GetRibbonSeedFromFilterData(), 61);
EXPECT_EQ_LegacyBloom(
BloomHash(FilterData()),
SelectByCacheLineSize(3755609649U, 1812694762, 1449142939));
EXPECT_EQ_FastBloom(BloomHash(FilterData()), 2561502687U);
EXPECT_EQ_Ribbon(BloomHash(FilterData()), 3129900846U);
EXPECT_EQ_FastBloom("34,74,130,236,643,882,962,1015,1035,1110", FirstFPs(10));
EXPECT_EQ_Ribbon("411,419,623,665,727,794,955,1052,1323,1330", FirstFPs(10));
ResetPolicy(14); for (int key = 0; key < 2087; key++) {
Add(Key(key, buffer));
}
Build();
EXPECT_EQ_LegacyBloom(GetNumProbesFromFilterData(), 9);
EXPECT_EQ_FastBloom(GetNumProbesFromFilterData(), 8);
EXPECT_EQ_Ribbon(GetRibbonSeedFromFilterData(), 61);
EXPECT_EQ_LegacyBloom(
BloomHash(FilterData()),
SelectByCacheLineSize(178861123, 379087593, 2574136516U));
EXPECT_EQ_FastBloom(BloomHash(FilterData()), 3709876890U);
EXPECT_EQ_Ribbon(BloomHash(FilterData()), 1855638875U);
EXPECT_EQ_FastBloom("130,240,522,565,989,2002,2526,3147,3543", FirstFPs(9));
EXPECT_EQ_Ribbon("665,727,1323,1755,3866,4232,4442,4492,4736", FirstFPs(9));
ResetPolicy(16); for (int key = 0; key < 2087; key++) {
Add(Key(key, buffer));
}
Build();
EXPECT_EQ_LegacyBloom(GetNumProbesFromFilterData(), 11);
EXPECT_EQ_FastBloom(GetNumProbesFromFilterData(), 9);
EXPECT_EQ_Ribbon(GetRibbonSeedFromFilterData(), 61);
EXPECT_EQ_LegacyBloom(
BloomHash(FilterData()),
SelectByCacheLineSize(1129406313, 3049154394U, 1727750964));
EXPECT_EQ_FastBloom(BloomHash(FilterData()), 1087138490U);
EXPECT_EQ_Ribbon(BloomHash(FilterData()), 459379967U);
EXPECT_EQ_FastBloom("3299,3611,3916,6620,7822,8079,8482,8942", FirstFPs(8));
EXPECT_EQ_Ribbon("727,1323,1755,4442,4736,5386,6974,7154,8222", FirstFPs(9));
ResetPolicy(10); for (int key = 0; key < 2087; key++) {
Add(Key(key, buffer));
}
Build();
EXPECT_EQ_Bloom(GetNumProbesFromFilterData(), 6);
EXPECT_EQ_Ribbon(GetRibbonSeedFromFilterData(), 61);
EXPECT_EQ_LegacyBloom(
BloomHash(FilterData()),
SelectByCacheLineSize(1478976371, 2910591341U, 1182970461));
EXPECT_EQ_FastBloom(BloomHash(FilterData()), 2498541272U);
EXPECT_EQ_Ribbon(BloomHash(FilterData()), 1273231667U);
EXPECT_EQ_FastBloom("16,126,133,422,466,472,813,1002,1035", FirstFPs(9));
EXPECT_EQ_Ribbon("296,411,419,612,619,623,630,665,686,727", FirstFPs(10));
ResetPolicy(10);
for (int key = 1; key < 2087; key++) {
Add(Key(key, buffer));
}
Build();
EXPECT_EQ_Bloom(GetNumProbesFromFilterData(), 6);
EXPECT_EQ_Ribbon(GetRibbonSeedFromFilterData(), 184);
EXPECT_EQ_LegacyBloom(
BloomHash(FilterData()),
SelectByCacheLineSize(4205696321U, 1132081253U, 2385981855U));
EXPECT_EQ_FastBloom(BloomHash(FilterData()), 2058382345U);
EXPECT_EQ_Ribbon(BloomHash(FilterData()), 3007790572U);
EXPECT_EQ_FastBloom("16,126,133,422,466,472,813,1002,1035", FirstFPs(9));
EXPECT_EQ_Ribbon("33,152,383,497,589,633,737,781,911,990", FirstFPs(10));
ResetPolicy(10);
for (int key = 1; key < 2088; key++) {
Add(Key(key, buffer));
}
Build();
EXPECT_EQ_Bloom(GetNumProbesFromFilterData(), 6);
EXPECT_EQ_Ribbon(GetRibbonSeedFromFilterData(), 184);
EXPECT_EQ_LegacyBloom(
BloomHash(FilterData()),
SelectByCacheLineSize(2885052954U, 769447944, 4175124908U));
EXPECT_EQ_FastBloom(BloomHash(FilterData()), 23699164U);
EXPECT_EQ_Ribbon(BloomHash(FilterData()), 1942323379U);
EXPECT_EQ_FastBloom("16,126,133,422,466,472,813,1002,1035", FirstFPs(9));
EXPECT_EQ_Ribbon("33,95,360,589,737,911,990,1048,1081,1414", FirstFPs(10));
ResetPolicy(9.5);
for (int key = 1; key < 2088; key++) {
Add(Key(key, buffer));
}
Build();
EXPECT_EQ_Bloom(GetNumProbesFromFilterData(), 6);
EXPECT_EQ_Ribbon(GetRibbonSeedFromFilterData(), 184);
EXPECT_EQ_LegacyBloom(
BloomHash(FilterData()),
SelectByCacheLineSize(2885052954U, 769447944, 4175124908U));
EXPECT_EQ_FastBloom(BloomHash(FilterData()), 3166884174U);
EXPECT_EQ_Ribbon(BloomHash(FilterData()), 1148258663U);
EXPECT_EQ_FastBloom("126,156,367,444,458,791,813,976,1015", FirstFPs(9));
EXPECT_EQ_Ribbon("33,54,95,360,589,693,737,911,990,1048", FirstFPs(10));
ResetPolicy(10.499);
for (int key = 1; key < 2088; key++) {
Add(Key(key, buffer));
}
Build();
EXPECT_EQ_LegacyBloom(GetNumProbesFromFilterData(), 6);
EXPECT_EQ_FastBloom(GetNumProbesFromFilterData(), 7);
EXPECT_EQ_Ribbon(GetRibbonSeedFromFilterData(), 184);
EXPECT_EQ_LegacyBloom(
BloomHash(FilterData()),
SelectByCacheLineSize(2885052954U, 769447944, 4175124908U));
EXPECT_EQ_FastBloom(BloomHash(FilterData()), 4098502778U);
EXPECT_EQ_Ribbon(BloomHash(FilterData()), 792138188U);
EXPECT_EQ_FastBloom("16,236,240,472,1015,1045,1111,1409,1465", FirstFPs(9));
EXPECT_EQ_Ribbon("33,95,360,589,737,990,1048,1081,1414,1643", FirstFPs(10));
ResetPolicy();
}
struct RawFilterTester {
std::array<char, 3000> data_{};
char* metadata_ptr_;
RawFilterTester() : metadata_ptr_(&*(data_.end() - 5)) {}
Slice ResetNoFill(uint32_t len_without_metadata, uint32_t num_lines,
uint32_t num_probes) {
metadata_ptr_[0] = static_cast<char>(num_probes);
EncodeFixed32(metadata_ptr_ + 1, num_lines);
uint32_t len = len_without_metadata + 5;
assert(len <= data_.size());
return Slice(metadata_ptr_ - len_without_metadata, len);
}
Slice Reset(uint32_t len_without_metadata, uint32_t num_lines,
uint32_t num_probes, bool fill_ones) {
data_.fill(fill_ones ? 0xff : 0);
return ResetNoFill(len_without_metadata, num_lines, num_probes);
}
Slice ResetWeirdFill(uint32_t len_without_metadata, uint32_t num_lines,
uint32_t num_probes) {
for (uint32_t i = 0; i < data_.size(); ++i) {
data_[i] = static_cast<char>(0x7b7b >> (i % 7));
}
return ResetNoFill(len_without_metadata, num_lines, num_probes);
}
};
TEST_P(FullBloomTest, RawSchema) {
RawFilterTester cft;
OpenRaw(cft.ResetWeirdFill(256, 1, 2));
EXPECT_EQ(uint64_t{11384799501900898790U}, PackedMatches());
OpenRaw(cft.ResetWeirdFill(256, 2, 2));
EXPECT_EQ(uint64_t{10157853359773492589U}, PackedMatches());
OpenRaw(cft.ResetWeirdFill(256, 4, 2));
EXPECT_EQ(uint64_t{7123594913907464682U}, PackedMatches());
OpenRaw(cft.ResetWeirdFill(256, 2U << 8, 255));
EXPECT_EQ(uint64_t{9957045189927952471U}, PackedMatches());
OpenRaw(cft.ResetWeirdFill(32, 2U << 8, 254));
EXPECT_EQ(uint64_t{6193930559317665002U}, PackedMatches());
OpenRaw(cft.ResetWeirdFill(112, 2U << 8, 254));
EXPECT_EQ(uint64_t{9007200345328128U}, PackedMatches());
}
TEST_P(FullBloomTest, CorruptFilters) {
RawFilterTester cft;
for (bool fill : {false, true}) {
OpenRaw(cft.Reset(CACHE_LINE_SIZE, 1, 6, fill));
ASSERT_EQ(fill, Matches("hello"));
ASSERT_EQ(fill, Matches("world"));
OpenRaw(cft.Reset(CACHE_LINE_SIZE * 3, 3, 6, fill));
ASSERT_EQ(fill, Matches("hello"));
ASSERT_EQ(fill, Matches("world"));
OpenRaw(cft.Reset(256 * 3, 3, 6, fill));
ASSERT_EQ(fill, Matches("hello"));
ASSERT_EQ(fill, Matches("world"));
OpenRaw(cft.Reset(CACHE_LINE_SIZE, 1, 30, fill));
ASSERT_EQ(fill, Matches("hello"));
ASSERT_EQ(fill, Matches("world"));
OpenRaw(cft.Reset(CACHE_LINE_SIZE, 1, 1, fill));
ASSERT_EQ(fill, Matches("hello"));
ASSERT_EQ(fill, Matches("world"));
OpenRaw(cft.Reset(CACHE_LINE_SIZE, 1, 0, fill));
ASSERT_TRUE(Matches("hello"));
ASSERT_TRUE(Matches("world"));
OpenRaw(cft.Reset(0, 0, 6, fill));
ASSERT_FALSE(Matches("hello"));
ASSERT_FALSE(Matches("world"));
OpenRaw(cft.Reset(0, 37, 6, fill));
ASSERT_FALSE(Matches("hello"));
ASSERT_FALSE(Matches("world"));
OpenRaw(cft.Reset(0, 0, 0, fill));
ASSERT_FALSE(Matches("hello"));
ASSERT_FALSE(Matches("world"));
OpenRaw(cft.Reset(CACHE_LINE_SIZE, 0, 6, fill));
ASSERT_TRUE(Matches("hello"));
ASSERT_TRUE(Matches("world"));
OpenRaw(cft.Reset(4, 3, 6, fill));
ASSERT_TRUE(Matches("hello"));
ASSERT_TRUE(Matches("world"));
OpenRaw(cft.Reset(97 * 3, 3, 6, fill));
ASSERT_TRUE(Matches("hello"));
ASSERT_TRUE(Matches("world"));
OpenRaw(cft.Reset(65 * 3, 3, 6, fill));
ASSERT_TRUE(Matches("hello"));
ASSERT_TRUE(Matches("world"));
OpenRaw(cft.Reset(static_cast<uint32_t>(-1), 3, 6, fill));
ASSERT_FALSE(Matches("hello"));
ASSERT_FALSE(Matches("world"));
OpenRaw(cft.Reset(static_cast<uint32_t>(-5), 3, 6, fill));
ASSERT_FALSE(Matches("hello"));
ASSERT_FALSE(Matches("world"));
OpenRaw(cft.Reset(CACHE_LINE_SIZE, 1, 31, fill));
ASSERT_EQ(fill, Matches("hello"));
ASSERT_EQ(fill, Matches("world"));
OpenRaw(cft.Reset(CACHE_LINE_SIZE, 1, 127, fill));
ASSERT_EQ(fill, Matches("hello"));
ASSERT_EQ(fill, Matches("world"));
OpenRaw(cft.Reset(CACHE_LINE_SIZE, 1, 128, fill));
ASSERT_TRUE(Matches("hello"));
ASSERT_TRUE(Matches("world"));
OpenRaw(cft.Reset(CACHE_LINE_SIZE, 1, 253, fill));
ASSERT_TRUE(Matches("hello"));
ASSERT_TRUE(Matches("world"));
OpenRaw(cft.Reset(CACHE_LINE_SIZE, 6U << 8, 255, fill));
ASSERT_EQ(fill, Matches("hello"));
ASSERT_EQ(fill, Matches("world"));
OpenRaw(cft.Reset(CACHE_LINE_SIZE, (6U << 8) | 1U, 255, fill));
ASSERT_TRUE(Matches("hello"));
ASSERT_TRUE(Matches("world"));
OpenRaw(cft.Reset(CACHE_LINE_SIZE, (6U << 8) | (1U << 16), 255, fill));
ASSERT_TRUE(Matches("hello"));
ASSERT_TRUE(Matches("world"));
OpenRaw(cft.Reset(CACHE_LINE_SIZE, (6U << 8) | (1U << 24), 255, fill));
ASSERT_TRUE(Matches("hello"));
ASSERT_TRUE(Matches("world"));
OpenRaw(cft.Reset(CACHE_LINE_SIZE, 30U << 8, 255, fill));
ASSERT_EQ(fill, Matches("hello"));
ASSERT_EQ(fill, Matches("world"));
OpenRaw(cft.Reset(CACHE_LINE_SIZE, 31U << 8, 255, fill));
ASSERT_TRUE(Matches("hello"));
ASSERT_TRUE(Matches("world"));
OpenRaw(cft.Reset(CACHE_LINE_SIZE, 33U << 8, 255, fill));
ASSERT_TRUE(Matches("hello"));
ASSERT_TRUE(Matches("world"));
OpenRaw(cft.Reset(CACHE_LINE_SIZE, 66U << 8, 255, fill));
ASSERT_TRUE(Matches("hello"));
ASSERT_TRUE(Matches("world"));
OpenRaw(cft.Reset(CACHE_LINE_SIZE, 130U << 8, 255, fill));
ASSERT_TRUE(Matches("hello"));
ASSERT_TRUE(Matches("world"));
}
OpenRaw(cft.Reset(128, (2U << 8) + 123U, 254, false));
ASSERT_FALSE(Matches("hello"));
ASSERT_FALSE(Matches("world"));
OpenRaw(cft.Reset(256, (2U << 8) + 123U, 254, false));
ASSERT_FALSE(Matches("hello"));
ASSERT_FALSE(Matches("world"));
OpenRaw(cft.Reset(128, (5000U << 8) + 123U, 254, false));
ASSERT_TRUE(Matches("hello"));
ASSERT_TRUE(Matches("world"));
for (int i = 0; Matches(std::to_string(i)); ++i) {
ASSERT_LT(i, 1000);
}
OpenRaw(cft.Reset(128, (1U << 8) + 123U, 254, false));
ASSERT_TRUE(Matches("hello"));
ASSERT_TRUE(Matches("world"));
OpenRaw(cft.Reset(128, (0U << 8) + 123U, 254, false));
ASSERT_TRUE(Matches("hello"));
ASSERT_TRUE(Matches("world"));
}
INSTANTIATE_TEST_CASE_P(Full, FullBloomTest,
testing::Values(kLegacyBloom, kFastLocalBloom,
kStandard128Ribbon));
static double GetEffectiveBitsPerKey(FilterBitsBuilder* builder) {
union {
uint64_t key_value = 0;
char key_bytes[8];
};
const unsigned kNumKeys = 1000;
Slice key_slice{key_bytes, 8};
for (key_value = 0; key_value < kNumKeys; ++key_value) {
builder->AddKey(key_slice);
}
std::unique_ptr<const char[]> buf;
auto filter = builder->Finish(&buf);
return filter.size() * 8 / (1.0 * kNumKeys);
}
static void SetTestingLevel(int levelish, FilterBuildingContext* ctx) {
if (levelish == -1) {
ctx->level_at_creation = 0;
ctx->reason = TableFileCreationReason::kFlush;
} else {
ctx->level_at_creation = levelish;
ctx->reason = TableFileCreationReason::kCompaction;
}
}
TEST(RibbonTest, RibbonTestLevelThreshold) {
BlockBasedTableOptions opts;
FilterBuildingContext ctx(opts);
std::shared_ptr<FilterPolicy> reused{NewRibbonFilterPolicy(10)};
for (CompactionStyle cs : {kCompactionStyleLevel, kCompactionStyleUniversal,
kCompactionStyleFIFO, kCompactionStyleNone}) {
ctx.compaction_style = cs;
for (int bloom_before_level : {-1, 0, 1, 10, INT_MAX - 1, INT_MAX}) {
SCOPED_TRACE("bloom_before_level=" + std::to_string(bloom_before_level));
std::vector<std::shared_ptr<FilterPolicy> > policies;
policies.emplace_back(NewRibbonFilterPolicy(10, bloom_before_level));
if (bloom_before_level == 0) {
policies.emplace_back(NewRibbonFilterPolicy(10));
}
ASSERT_OK(reused->ConfigureOption({}, "bloom_before_level",
std::to_string(bloom_before_level)));
policies.push_back(reused);
for (auto& policy : policies) {
std::unique_ptr<FilterBitsBuilder> builder;
if (bloom_before_level < INT_MAX) {
SetTestingLevel(bloom_before_level, &ctx);
builder.reset(policy->GetBuilderWithContext(ctx));
ASSERT_LT(GetEffectiveBitsPerKey(builder.get()), 8);
}
if (bloom_before_level >= 0) {
SetTestingLevel(bloom_before_level - 1, &ctx);
builder.reset(policy->GetBuilderWithContext(ctx));
if (cs == kCompactionStyleLevel || cs == kCompactionStyleUniversal) {
ASSERT_GT(GetEffectiveBitsPerKey(builder.get()), 9);
} else if (bloom_before_level == INT_MAX) {
ASSERT_GT(GetEffectiveBitsPerKey(builder.get()), 9);
} else {
ASSERT_LT(GetEffectiveBitsPerKey(builder.get()), 8);
}
}
ctx.level_at_creation = -1;
ctx.reason = TableFileCreationReason::kMisc;
builder.reset(policy->GetBuilderWithContext(ctx));
if (bloom_before_level < INT_MAX) {
ASSERT_LT(GetEffectiveBitsPerKey(builder.get()), 8);
} else {
ASSERT_GT(GetEffectiveBitsPerKey(builder.get()), 9);
}
}
}
}
}
}
int main(int argc, char** argv) {
ROCKSDB_NAMESPACE::port::InstallStackTraceHandler();
::testing::InitGoogleTest(&argc, argv);
ParseCommandLineFlags(&argc, &argv, true);
return RUN_ALL_TESTS();
}
#endif