#include "table/block_based/block.h"
#include <algorithm>
#include <cstdio>
#include <set>
#include <string>
#include <unordered_set>
#include <utility>
#include <vector>
#include "db/db_test_util.h"
#include "db/dbformat.h"
#include "db/memtable.h"
#include "db/write_batch_internal.h"
#include "rocksdb/db.h"
#include "rocksdb/env.h"
#include "rocksdb/iterator.h"
#include "rocksdb/slice_transform.h"
#include "rocksdb/table.h"
#include "table/block_based/block_based_table_reader.h"
#include "table/block_based/block_builder.h"
#include "table/block_based/data_block_footer.h"
#include "table/format.h"
#include "test_util/testharness.h"
#include "test_util/testutil.h"
#include "util/random.h"
namespace ROCKSDB_NAMESPACE {
std::string GenerateInternalKey(int primary_key, int secondary_key,
int padding_size, Random* rnd,
size_t ts_sz = 0) {
char buf[50];
char* p = &buf[0];
snprintf(buf, sizeof(buf), "%6d%4d", primary_key, secondary_key);
std::string k(p);
if (padding_size) {
k += rnd->RandomString(padding_size);
}
AppendInternalKeyFooter(&k, 0 , kTypeValue);
std::string key_with_ts;
if (ts_sz > 0) {
PadInternalKeyWithMinTimestamp(&key_with_ts, k, ts_sz);
return key_with_ts;
}
return k;
}
void GenerateRandomKVs(std::vector<std::string>* keys,
std::vector<std::string>* values, const int from,
const int len, const int step = 1,
const int padding_size = 0,
const int keys_share_prefix = 1, size_t ts_sz = 0) {
Random rnd(302);
for (int i = from; i < from + len; i += step) {
for (int j = 0; j < keys_share_prefix; ++j) {
keys->emplace_back(GenerateInternalKey(i, j, padding_size, &rnd, ts_sz));
values->emplace_back(rnd.RandomString(100));
}
}
}
class BlockTest
: public testing::Test,
public testing::WithParamInterface<std::tuple<
bool, test::UserDefinedTimestampTestMode,
BlockBasedTableOptions::DataBlockIndexType, uint32_t, bool>> {
public:
bool keyUseDeltaEncoding() const { return std::get<0>(GetParam()); }
bool isUDTEnabled() const {
return test::IsUDTEnabled(std::get<1>(GetParam()));
}
bool shouldPersistUDT() const {
return test::ShouldPersistUDT(std::get<1>(GetParam()));
}
BlockBasedTableOptions::DataBlockIndexType dataBlockIndexType() const {
return std::get<2>(GetParam());
}
uint32_t getRestartInterval() const { return std::get<3>(GetParam()); }
bool useSeparatedKVStorage() const { return std::get<4>(GetParam()); }
};
TEST_P(BlockTest, SimpleTest) {
Random rnd(301);
Options options = Options();
if (isUDTEnabled()) {
options.comparator = test::BytewiseComparatorWithU64TsWrapper();
}
size_t ts_sz = options.comparator->timestamp_size();
std::vector<std::string> keys;
std::vector<std::string> values;
BlockBasedTableOptions::DataBlockIndexType index_type =
isUDTEnabled() ? BlockBasedTableOptions::kDataBlockBinarySearch
: dataBlockIndexType();
BlockBuilder builder(
static_cast<int>(getRestartInterval()), keyUseDeltaEncoding(),
false , index_type,
0.75 , ts_sz, shouldPersistUDT(),
false , useSeparatedKVStorage());
int num_records = 20;
GenerateRandomKVs(&keys, &values, 0, num_records, 1 ,
0 , 1 , ts_sz);
for (int i = 0; i < num_records; i++) {
builder.Add(keys[i], values[i]);
}
Slice rawblock = builder.Finish();
BlockContents contents;
contents.data = rawblock;
Block reader(std::move(contents), 0 ,
nullptr , getRestartInterval());
int count = 0;
InternalIterator* iter = reader.NewDataIterator(
options.comparator, kDisableGlobalSequenceNumber, nullptr ,
nullptr , false ,
shouldPersistUDT());
for (iter->SeekToFirst(); iter->Valid(); count++, iter->Next()) {
Slice k = iter->key();
Slice v = iter->value();
ASSERT_EQ(k.ToString().compare(keys[count]), 0);
ASSERT_EQ(v.ToString().compare(values[count]), 0);
}
delete iter;
iter = reader.NewDataIterator(
options.comparator, kDisableGlobalSequenceNumber, nullptr ,
nullptr , false ,
shouldPersistUDT());
for (int i = 0; i < num_records; i++) {
int index = rnd.Uniform(num_records);
Slice k(keys[index]);
iter->Seek(k);
ASSERT_TRUE(iter->Valid());
Slice v = iter->value();
ASSERT_EQ(v.ToString().compare(values[index]), 0);
}
delete iter;
}
BlockContents GetBlockContents(
std::unique_ptr<BlockBuilder>* builder,
const std::vector<std::string>& keys,
const std::vector<std::string>& values, bool key_use_delta_encoding,
size_t ts_sz, bool should_persist_udt, const int = 1,
BlockBasedTableOptions::DataBlockIndexType dblock_index_type =
BlockBasedTableOptions::DataBlockIndexType::kDataBlockBinarySearch,
bool use_separated_kv_storage = false, uint32_t restart_interval = 1) {
builder->reset(new BlockBuilder(
static_cast<int>(restart_interval), key_use_delta_encoding,
false , dblock_index_type,
0.75 , ts_sz, should_persist_udt,
false , use_separated_kv_storage));
for (size_t i = 0; i < keys.size(); ++i) {
(*builder)->Add(keys[i], values[i]);
}
Slice rawblock = (*builder)->Finish();
BlockContents contents;
contents.data = rawblock;
return contents;
}
void CheckBlockContents(BlockContents contents, const int max_key,
const std::vector<std::string>& keys,
const std::vector<std::string>& values,
bool is_udt_enabled, bool should_persist_udt,
uint32_t restart_interval) {
const size_t prefix_size = 6;
BlockContents contents_ref(contents.data);
Block reader1(std::move(contents), 0 ,
nullptr , restart_interval);
Block reader2(std::move(contents_ref), 0 ,
nullptr , restart_interval);
std::unique_ptr<const SliceTransform> prefix_extractor(
NewFixedPrefixTransform(prefix_size));
std::unique_ptr<InternalIterator> regular_iter(reader2.NewDataIterator(
is_udt_enabled ? test::BytewiseComparatorWithU64TsWrapper()
: BytewiseComparator(),
kDisableGlobalSequenceNumber, nullptr , nullptr ,
false , should_persist_udt));
for (size_t i = 0; i < keys.size(); i++) {
regular_iter->Seek(keys[i]);
ASSERT_OK(regular_iter->status());
ASSERT_TRUE(regular_iter->Valid());
Slice v = regular_iter->value();
ASSERT_EQ(v.ToString().compare(values[i]), 0);
}
for (int i = 1; i < max_key - 1; i += 2) {
auto key = GenerateInternalKey(i, 0, 0, nullptr,
is_udt_enabled ? 8 : 0 );
regular_iter->Seek(key);
ASSERT_TRUE(regular_iter->Valid());
}
}
TEST_P(BlockTest, SimpleIndexHash) {
const int kMaxKey = 100000;
size_t ts_sz = isUDTEnabled() ? 8 : 0;
std::vector<std::string> keys;
std::vector<std::string> values;
GenerateRandomKVs(&keys, &values, 0 ,
kMaxKey , 2 ,
8 ,
1 , ts_sz);
std::unique_ptr<BlockBuilder> builder;
auto contents = GetBlockContents(
&builder, keys, values, keyUseDeltaEncoding(), ts_sz, shouldPersistUDT(),
1 ,
isUDTEnabled()
? BlockBasedTableOptions::DataBlockIndexType::kDataBlockBinarySearch
: dataBlockIndexType(),
useSeparatedKVStorage(), getRestartInterval());
CheckBlockContents(std::move(contents), kMaxKey, keys, values, isUDTEnabled(),
shouldPersistUDT(), getRestartInterval());
}
TEST_P(BlockTest, IndexHashWithSharedPrefix) {
const int kMaxKey = 100000;
const int kPrefixGroup = 5;
size_t ts_sz = isUDTEnabled() ? 8 : 0;
std::vector<std::string> keys;
std::vector<std::string> values;
GenerateRandomKVs(&keys, &values, 0, kMaxKey, 2 ,
10 ,
kPrefixGroup , ts_sz);
std::unique_ptr<BlockBuilder> builder;
auto contents = GetBlockContents(
&builder, keys, values, keyUseDeltaEncoding(), ts_sz, shouldPersistUDT(),
kPrefixGroup,
isUDTEnabled()
? BlockBasedTableOptions::DataBlockIndexType::kDataBlockBinarySearch
: dataBlockIndexType(),
useSeparatedKVStorage(), getRestartInterval());
CheckBlockContents(std::move(contents), kMaxKey, keys, values, isUDTEnabled(),
shouldPersistUDT(), getRestartInterval());
}
INSTANTIATE_TEST_CASE_P(
P, BlockTest,
::testing::Combine(
::testing::Bool(), ::testing::ValuesIn(test::GetUDTTestModes()),
::testing::Values(
BlockBasedTableOptions::DataBlockIndexType::kDataBlockBinarySearch,
BlockBasedTableOptions::DataBlockIndexType::
kDataBlockBinaryAndHash),
::testing::Values(1, 8, 16), ::testing::Bool()));
class BlockReadAmpBitmapSlowAndAccurate {
public:
void Mark(size_t start_offset, size_t end_offset) {
assert(end_offset >= start_offset);
marked_ranges_.emplace(end_offset, start_offset);
}
void ResetCheckSequence() { iter_valid_ = false; }
bool IsPinMarked(size_t offset) {
if (iter_valid_) {
for (int i = 0; i < 64; i++) {
if (offset < iter_->second) {
return false;
}
if (offset <= iter_->first) {
return true;
}
iter_++;
if (iter_ == marked_ranges_.end()) {
iter_valid_ = false;
return false;
}
}
}
iter_ = marked_ranges_.lower_bound(
std::make_pair(offset, static_cast<size_t>(0)));
if (iter_ == marked_ranges_.end()) {
iter_valid_ = false;
return false;
}
iter_valid_ = true;
return offset <= iter_->first && offset >= iter_->second;
}
private:
std::set<std::pair<size_t, size_t>> marked_ranges_;
std::set<std::pair<size_t, size_t>>::iterator iter_;
bool iter_valid_ = false;
};
TEST_F(BlockTest, BlockReadAmpBitmap) {
uint32_t pin_offset = 0;
SyncPoint::GetInstance()->SetCallBack(
"BlockReadAmpBitmap:rnd", [&pin_offset](void* arg) {
pin_offset = *(static_cast<uint32_t*>(arg));
});
SyncPoint::GetInstance()->EnableProcessing();
std::vector<size_t> block_sizes = {
1, 32, 61, 64, 512, 1024, 1024 * 4, 1024 * 10, 1024 * 50, 1024 * 1024 * 4, 777,
124653,
};
const size_t kBytesPerBit = 64;
Random rnd(301);
for (size_t block_size : block_sizes) {
std::shared_ptr<Statistics> stats = ROCKSDB_NAMESPACE::CreateDBStatistics();
BlockReadAmpBitmap read_amp_bitmap(block_size, kBytesPerBit, stats.get());
BlockReadAmpBitmapSlowAndAccurate read_amp_slow_and_accurate;
size_t needed_bits = (block_size / kBytesPerBit);
if (block_size % kBytesPerBit != 0) {
needed_bits++;
}
ASSERT_EQ(stats->getTickerCount(READ_AMP_TOTAL_READ_BYTES), block_size);
std::vector<size_t> random_entry_offsets;
for (int i = 0; i < 1000; i++) {
random_entry_offsets.push_back(rnd.Next() % block_size);
}
std::sort(random_entry_offsets.begin(), random_entry_offsets.end());
auto it =
std::unique(random_entry_offsets.begin(), random_entry_offsets.end());
random_entry_offsets.resize(
std::distance(random_entry_offsets.begin(), it));
std::vector<std::pair<size_t, size_t>> random_entries;
for (size_t i = 0; i < random_entry_offsets.size(); i++) {
size_t entry_start = random_entry_offsets[i];
size_t entry_end;
if (i + 1 < random_entry_offsets.size()) {
entry_end = random_entry_offsets[i + 1] - 1;
} else {
entry_end = block_size - 1;
}
random_entries.emplace_back(entry_start, entry_end);
}
for (size_t i = 0; i < random_entries.size(); i++) {
read_amp_slow_and_accurate.ResetCheckSequence();
auto& current_entry = random_entries[rnd.Next() % random_entries.size()];
read_amp_bitmap.Mark(static_cast<uint32_t>(current_entry.first),
static_cast<uint32_t>(current_entry.second));
read_amp_slow_and_accurate.Mark(current_entry.first,
current_entry.second);
size_t total_bits = 0;
for (size_t bit_idx = 0; bit_idx < needed_bits; bit_idx++) {
total_bits += read_amp_slow_and_accurate.IsPinMarked(
bit_idx * kBytesPerBit + pin_offset);
}
size_t expected_estimate_useful = total_bits * kBytesPerBit;
size_t got_estimate_useful =
stats->getTickerCount(READ_AMP_ESTIMATE_USEFUL_BYTES);
ASSERT_EQ(expected_estimate_useful, got_estimate_useful);
}
}
SyncPoint::GetInstance()->DisableProcessing();
SyncPoint::GetInstance()->ClearAllCallBacks();
}
TEST_F(BlockTest, BlockWithReadAmpBitmap) {
Random rnd(301);
Options options = Options();
std::vector<std::string> keys;
std::vector<std::string> values;
BlockBuilder builder(16);
int num_records = 10000;
GenerateRandomKVs(&keys, &values, 0, num_records, 1 );
for (int i = 0; i < num_records; i++) {
builder.Add(keys[i], values[i]);
}
Slice rawblock = builder.Finish();
const size_t kBytesPerBit = 8;
{
std::shared_ptr<Statistics> stats = ROCKSDB_NAMESPACE::CreateDBStatistics();
BlockContents contents;
contents.data = rawblock;
Block reader(std::move(contents), kBytesPerBit, stats.get());
size_t read_bytes = 0;
DataBlockIter* iter = reader.NewDataIterator(
options.comparator, kDisableGlobalSequenceNumber, nullptr, stats.get());
for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
iter->value();
read_bytes += iter->TEST_CurrentEntrySize();
double semi_acc_read_amp =
static_cast<double>(read_bytes) / rawblock.size();
double read_amp = static_cast<double>(stats->getTickerCount(
READ_AMP_ESTIMATE_USEFUL_BYTES)) /
stats->getTickerCount(READ_AMP_TOTAL_READ_BYTES);
double error_pct = fabs(semi_acc_read_amp - read_amp) * 100;
EXPECT_LT(error_pct, 1);
}
delete iter;
}
{
std::shared_ptr<Statistics> stats = ROCKSDB_NAMESPACE::CreateDBStatistics();
BlockContents contents;
contents.data = rawblock;
Block reader(std::move(contents), kBytesPerBit, stats.get());
size_t read_bytes = 0;
DataBlockIter* iter = reader.NewDataIterator(
options.comparator, kDisableGlobalSequenceNumber, nullptr, stats.get());
for (int i = 0; i < num_records; i++) {
Slice k(keys[i]);
iter->Seek(k);
iter->value();
read_bytes += iter->TEST_CurrentEntrySize();
double semi_acc_read_amp =
static_cast<double>(read_bytes) / rawblock.size();
double read_amp = static_cast<double>(stats->getTickerCount(
READ_AMP_ESTIMATE_USEFUL_BYTES)) /
stats->getTickerCount(READ_AMP_TOTAL_READ_BYTES);
double error_pct = fabs(semi_acc_read_amp - read_amp) * 100;
EXPECT_LT(error_pct, 1);
}
delete iter;
}
{
std::shared_ptr<Statistics> stats = ROCKSDB_NAMESPACE::CreateDBStatistics();
BlockContents contents;
contents.data = rawblock;
Block reader(std::move(contents), kBytesPerBit, stats.get());
size_t read_bytes = 0;
DataBlockIter* iter = reader.NewDataIterator(
options.comparator, kDisableGlobalSequenceNumber, nullptr, stats.get());
std::unordered_set<int> read_keys;
for (int i = 0; i < num_records; i++) {
int index = rnd.Uniform(num_records);
Slice k(keys[index]);
iter->Seek(k);
iter->value();
if (read_keys.find(index) == read_keys.end()) {
read_keys.insert(index);
read_bytes += iter->TEST_CurrentEntrySize();
}
double semi_acc_read_amp =
static_cast<double>(read_bytes) / rawblock.size();
double read_amp = static_cast<double>(stats->getTickerCount(
READ_AMP_ESTIMATE_USEFUL_BYTES)) /
stats->getTickerCount(READ_AMP_TOTAL_READ_BYTES);
double error_pct = fabs(semi_acc_read_amp - read_amp) * 100;
EXPECT_LT(error_pct, 2);
}
delete iter;
}
}
TEST_F(BlockTest, ReadAmpBitmapPow2) {
std::shared_ptr<Statistics> stats = ROCKSDB_NAMESPACE::CreateDBStatistics();
ASSERT_EQ(BlockReadAmpBitmap(100, 1, stats.get()).GetBytesPerBit(), 1u);
ASSERT_EQ(BlockReadAmpBitmap(100, 2, stats.get()).GetBytesPerBit(), 2u);
ASSERT_EQ(BlockReadAmpBitmap(100, 4, stats.get()).GetBytesPerBit(), 4u);
ASSERT_EQ(BlockReadAmpBitmap(100, 8, stats.get()).GetBytesPerBit(), 8u);
ASSERT_EQ(BlockReadAmpBitmap(100, 16, stats.get()).GetBytesPerBit(), 16u);
ASSERT_EQ(BlockReadAmpBitmap(100, 32, stats.get()).GetBytesPerBit(), 32u);
ASSERT_EQ(BlockReadAmpBitmap(100, 3, stats.get()).GetBytesPerBit(), 2u);
ASSERT_EQ(BlockReadAmpBitmap(100, 7, stats.get()).GetBytesPerBit(), 4u);
ASSERT_EQ(BlockReadAmpBitmap(100, 11, stats.get()).GetBytesPerBit(), 8u);
ASSERT_EQ(BlockReadAmpBitmap(100, 17, stats.get()).GetBytesPerBit(), 16u);
ASSERT_EQ(BlockReadAmpBitmap(100, 33, stats.get()).GetBytesPerBit(), 32u);
ASSERT_EQ(BlockReadAmpBitmap(100, 35, stats.get()).GetBytesPerBit(), 32u);
}
void AddIndexBlockEntry(BlockBuilder& builder, const Slice& key,
const BlockHandle& bh, const BlockHandle* prev,
bool include_first_key,
const Slice& first_internal_key = Slice()) {
IndexValue entry(bh, first_internal_key);
std::string encoded_entry;
entry.EncodeTo(&encoded_entry, include_first_key, nullptr);
std::string delta_encoded_entry;
if (prev) {
entry.EncodeTo(&delta_encoded_entry, include_first_key, prev);
}
const Slice delta_slice(delta_encoded_entry);
builder.Add(key, encoded_entry, &delta_slice);
}
enum class KeyDistribution { kUniform, kNonUniform };
class IndexBlockTest
: public testing::Test,
public testing::WithParamInterface<
std::tuple<bool, bool, bool, bool, test::UserDefinedTimestampTestMode,
BlockBasedTableOptions::BlockSearchType, int, int, int,
std::pair<int, KeyDistribution>>> {
public:
IndexBlockTest() = default;
bool keyIncludesSeq() const { return std::get<0>(GetParam()); }
bool useValueDeltaEncoding() const { return std::get<1>(GetParam()); }
bool includeFirstKey() const { return std::get<2>(GetParam()); }
bool useSeparatedKVStorage() const { return std::get<3>(GetParam()); }
bool isUDTEnabled() const {
return test::IsUDTEnabled(std::get<4>(GetParam()));
}
bool shouldPersistUDT() const {
return test::ShouldPersistUDT(std::get<4>(GetParam()));
}
BlockBasedTableOptions::BlockSearchType indexSearchType() const {
return isUDTEnabled() ? BlockBasedTableOptions::kBinary
: std::get<5>(GetParam());
}
int numRecords() const {
return std::min(1 << keyLength(), std::get<6>(GetParam()));
}
int indexBlockRestartInterval() const { return std::get<7>(GetParam()); }
int keyLength() const { return std::get<8>(GetParam()); }
int prefixLength() const { return std::get<9>(GetParam()).first; }
KeyDistribution keyDistribution() const {
return std::get<9>(GetParam()).second;
}
};
void GenerateRandomIndexEntries(
std::vector<std::string>* separators,
std::vector<BlockHandle>* block_handles,
std::vector<std::string>* first_keys, const int len, size_t ts_sz = 0,
int key_length = 12, int prefix_length = 0,
KeyDistribution distribution = KeyDistribution::kUniform) {
Random rnd(42);
std::string prefix(prefix_length, 'x');
std::set<std::string> keys;
int cluster_prefix_len = std::max(0, key_length - 5);
std::string cluster1_prefix = prefix + rnd.RandomString(cluster_prefix_len);
std::string cluster2_prefix = prefix + rnd.RandomString(cluster_prefix_len);
while ((int)keys.size() < len * 2) {
std::string new_key;
if (distribution == KeyDistribution::kNonUniform) {
int remaining = key_length - cluster_prefix_len;
const std::string& cp =
(keys.size() % 2 == 0) ? cluster1_prefix : cluster2_prefix;
new_key = cp + rnd.RandomString(std::max(1, remaining));
} else {
new_key = prefix + test::RandomKey(&rnd, key_length);
}
AppendInternalKeyFooter(&new_key, 0 , kTypeValue);
if (ts_sz > 0) {
std::string key;
PadInternalKeyWithMinTimestamp(&key, new_key, ts_sz);
keys.insert(std::move(key));
} else {
keys.insert(std::move(new_key));
}
}
uint64_t offset = 0;
for (auto it = keys.begin(); it != keys.end();) {
first_keys->emplace_back(*it++);
separators->emplace_back(*it++);
uint64_t size = rnd.Uniform(1024 * 16);
BlockHandle handle(offset, size);
offset += size + BlockBasedTable::kBlockTrailerSize;
block_handles->emplace_back(handle);
}
}
TEST_P(IndexBlockTest, IndexValueEncodingTest) {
Random rnd(301);
Options options = Options();
if (isUDTEnabled()) {
options.comparator = test::BytewiseComparatorWithU64TsWrapper();
}
size_t ts_sz = options.comparator->timestamp_size();
std::vector<std::string> separators;
std::vector<BlockHandle> block_handles;
std::vector<std::string> first_keys;
const bool kUseDeltaEncoding = true;
BlockBuilder builder(
indexBlockRestartInterval(), kUseDeltaEncoding, useValueDeltaEncoding(),
BlockBasedTableOptions::kDataBlockBinarySearch,
0.75 , ts_sz, shouldPersistUDT(),
!keyIncludesSeq(), useSeparatedKVStorage());
int num_records = numRecords();
GenerateRandomIndexEntries(&separators, &block_handles, &first_keys,
num_records, ts_sz, keyLength(), prefixLength(),
keyDistribution());
BlockHandle last_encoded_handle;
for (int i = 0; i < num_records; i++) {
std::string first_key_to_persist_buf;
Slice first_internal_key = first_keys[i];
if (ts_sz > 0 && !shouldPersistUDT()) {
StripTimestampFromInternalKey(&first_key_to_persist_buf, first_keys[i],
ts_sz);
first_internal_key = first_key_to_persist_buf;
}
const BlockHandle* prev =
(useValueDeltaEncoding() && i > 0) ? &last_encoded_handle : nullptr;
Slice add_key =
keyIncludesSeq() ? Slice(separators[i]) : ExtractUserKey(separators[i]);
AddIndexBlockEntry(builder, add_key, block_handles[i], prev,
includeFirstKey(), first_internal_key);
last_encoded_handle = block_handles[i];
}
Slice rawblock = builder.Finish();
BlockContents contents;
contents.data = rawblock;
Block reader(std::move(contents), 0 ,
nullptr ,
static_cast<uint32_t>(indexBlockRestartInterval()));
const bool kTotalOrderSeek = true;
IndexBlockIter* kNullIter = nullptr;
Statistics* kNullStats = nullptr;
InternalIteratorBase<IndexValue>* iter = reader.NewIndexIterator(
options.comparator, kDisableGlobalSequenceNumber, kNullIter, kNullStats,
kTotalOrderSeek, includeFirstKey(), keyIncludesSeq(),
!useValueDeltaEncoding(), false ,
shouldPersistUDT(), nullptr , indexSearchType());
iter->SeekToFirst();
for (int index = 0; index < num_records; ++index) {
ASSERT_TRUE(iter->Valid());
Slice k = iter->key();
IndexValue v = iter->value();
if (keyIncludesSeq()) {
EXPECT_EQ(separators[index], k.ToString());
} else {
const Slice user_key = ExtractUserKey(separators[index]);
EXPECT_EQ(user_key, k);
}
EXPECT_EQ(block_handles[index].offset(), v.handle.offset());
EXPECT_EQ(block_handles[index].size(), v.handle.size());
EXPECT_EQ(includeFirstKey() ? first_keys[index] : "",
v.first_internal_key.ToString());
iter->Next();
}
delete iter;
iter = reader.NewIndexIterator(
options.comparator, kDisableGlobalSequenceNumber, kNullIter, kNullStats,
kTotalOrderSeek, includeFirstKey(), keyIncludesSeq(),
!useValueDeltaEncoding(), false ,
shouldPersistUDT(), nullptr , indexSearchType());
for (int i = 0; i < num_records * 2; i++) {
int index = rnd.Uniform(num_records);
Slice k(separators[index]);
iter->Seek(k);
ASSERT_TRUE(iter->Valid());
IndexValue v = iter->value();
if (keyIncludesSeq()) {
EXPECT_EQ(separators[index], iter->key().ToString());
} else {
const Slice user_key = ExtractUserKey(separators[index]);
EXPECT_EQ(user_key, iter->key());
}
EXPECT_EQ(block_handles[index].offset(), v.handle.offset());
EXPECT_EQ(block_handles[index].size(), v.handle.size());
EXPECT_EQ(includeFirstKey() ? first_keys[index] : "",
v.first_internal_key.ToString());
}
delete iter;
}
INSTANTIATE_TEST_CASE_P(
P, IndexBlockTest,
::testing::Combine(
::testing::Bool(), ::testing::Bool(), ::testing::Bool(),
::testing::Bool(), ::testing::ValuesIn(test::GetUDTTestModes()),
::testing::Values(
BlockBasedTableOptions::BlockSearchType::kBinary,
BlockBasedTableOptions::BlockSearchType::kInterpolation),
::testing::Values(1, 100), ::testing::Values(1, 16), ::testing::Values(1, 8, 12), ::testing::Values(std::make_pair(0, KeyDistribution::kUniform),
std::make_pair(0, KeyDistribution::kNonUniform),
std::make_pair(50, KeyDistribution::kUniform),
std::make_pair(50, KeyDistribution::kNonUniform))));
TEST(IndexBlockTest, InterpolationSearchPrefixBoundary) {
const bool kIncludeFirstKey = false;
const bool kUseValueDeltaEncoding = true;
const uint64_t kBlockSize = 50;
const std::string kPrefix = "ABCDEFGHIJ";
const int kNumKeys = 20;
std::vector<std::string> keys;
keys.reserve(kNumKeys);
for (int i = 0; i < kNumKeys; i++) {
std::string suffix = std::to_string(i);
char formatted_suffix[4];
snprintf(formatted_suffix, sizeof(formatted_suffix), "%03d", i);
keys.push_back(kPrefix + formatted_suffix);
}
std::vector<BlockHandle> handles;
handles.reserve(kNumKeys);
for (int i = 0; i < kNumKeys; i++) {
handles.emplace_back(i * (kBlockSize + BlockBasedTable::kBlockTrailerSize),
kBlockSize);
}
BlockBuilder builder(
1 , true ,
kUseValueDeltaEncoding, BlockBasedTableOptions::kDataBlockBinarySearch,
0.75 , 0 ,
false , true );
for (int i = 0; i < kNumKeys; i++) {
BlockHandle* prev = i > 0 ? &handles[i - 1] : nullptr;
AddIndexBlockEntry(builder, keys[i], handles[i], prev, kIncludeFirstKey);
}
Slice rawblock = builder.Finish();
BlockContents contents;
contents.data = rawblock;
Block reader(std::move(contents));
auto make_target = [](const std::string& user_key) {
std::string target = user_key;
AppendInternalKeyFooter(&target, kMaxSequenceNumber, kValueTypeForSeek);
return target;
};
std::unique_ptr<InternalIteratorBase<IndexValue>> iter(
reader.NewIndexIterator(
BytewiseComparator(), kDisableGlobalSequenceNumber,
nullptr , nullptr , true ,
kIncludeFirstKey, false ,
!kUseValueDeltaEncoding ,
false ,
true ,
nullptr ,
BlockBasedTableOptions::BlockSearchType::kInterpolation));
iter->Seek(make_target("AAAAAA"));
ASSERT_TRUE(iter->Valid());
EXPECT_EQ(iter->key(), keys[0]);
iter->Seek(make_target(""));
ASSERT_TRUE(iter->Valid());
EXPECT_EQ(iter->key(), keys[0]);
iter->Seek(make_target("ABCDEFGHZZ"));
ASSERT_FALSE(iter->Valid());
iter->Seek(make_target("ABCDEFGHIJ"));
ASSERT_TRUE(iter->Valid());
EXPECT_EQ(iter->key(), keys[0]);
iter->Seek(make_target("ABCDEFG"));
ASSERT_TRUE(iter->Valid());
EXPECT_EQ(iter->key(), keys[0]);
}
TEST(IndexBlockTest, InterpolationSearchPrefixBoundary2) {
const bool kIncludeFirstKey = false;
const bool kUseValueDeltaEncoding = true;
const uint64_t kBlockSize = 50;
const std::string kUserKey = "ABCDEFGHIJ";
const int kNumKeys = 20;
std::vector<std::string> keys;
keys.reserve(kNumKeys);
for (int i = 0; i < kNumKeys; i++) {
std::string ikey = kUserKey;
SequenceNumber seq = static_cast<SequenceNumber>(kNumKeys - i);
AppendInternalKeyFooter(&ikey, seq, kTypeValue);
keys.push_back(ikey);
}
std::vector<BlockHandle> handles;
handles.reserve(kNumKeys);
for (int i = 0; i < kNumKeys; i++) {
handles.emplace_back(i * (kBlockSize + BlockBasedTable::kBlockTrailerSize),
kBlockSize);
}
BlockBuilder builder(
1 , true ,
kUseValueDeltaEncoding, BlockBasedTableOptions::kDataBlockBinarySearch,
0.75 , 0 ,
false , false );
for (int i = 0; i < kNumKeys; i++) {
BlockHandle* prev = i > 0 ? &handles[i - 1] : nullptr;
AddIndexBlockEntry(builder, keys[i], handles[i], prev, kIncludeFirstKey);
}
Slice rawblock = builder.Finish();
BlockContents contents;
contents.data = rawblock;
Block reader(std::move(contents));
auto make_target = [&](const std::string& user_key,
SequenceNumber seq = kMaxSequenceNumber) {
std::string target = user_key;
AppendInternalKeyFooter(&target, seq, kTypeValue);
return target;
};
std::unique_ptr<InternalIteratorBase<IndexValue>> iter(
reader.NewIndexIterator(
BytewiseComparator(), kDisableGlobalSequenceNumber,
nullptr , nullptr , true ,
kIncludeFirstKey, true ,
!kUseValueDeltaEncoding ,
false ,
true ,
nullptr ,
BlockBasedTableOptions::BlockSearchType::kInterpolation));
for (int i = 0; i < kNumKeys; i++) {
SequenceNumber seq = static_cast<SequenceNumber>(kNumKeys - i);
iter->Seek(make_target(kUserKey, seq));
ASSERT_TRUE(iter->Valid());
EXPECT_EQ(iter->key(), keys[i]);
}
iter->Seek(make_target("AAAAAA"));
ASSERT_TRUE(iter->Valid());
EXPECT_EQ(iter->key(), keys[0]);
iter->Seek(make_target(""));
ASSERT_TRUE(iter->Valid());
EXPECT_EQ(iter->key(), keys[0]);
iter->Seek(make_target("ABCDEFGHZZ"));
ASSERT_FALSE(iter->Valid());
iter->Seek(make_target("ABCDEFGHIJ"));
ASSERT_TRUE(iter->Valid());
EXPECT_EQ(iter->key(), keys[0]);
iter->Seek(make_target("ABCDEFG"));
ASSERT_TRUE(iter->Valid());
EXPECT_EQ(iter->key(), keys[0]);
iter->Seek(make_target("ABCDEFGHIJ" + std::string(1, kTypeValue)));
ASSERT_FALSE(iter->Valid());
}
class BlockPerKVChecksumTest : public DBTestBase {
public:
BlockPerKVChecksumTest()
: DBTestBase("block_per_kv_checksum", false) {}
template <typename TBlockIter>
void TestIterateForward(std::unique_ptr<TBlockIter>& biter,
size_t& verification_count) {
while (biter->Valid()) {
verification_count = 0;
biter->Next();
if (biter->Valid()) {
ASSERT_GE(verification_count, 1);
}
}
}
template <typename TBlockIter>
void TestIterateBackward(std::unique_ptr<TBlockIter>& biter,
size_t& verification_count) {
while (biter->Valid()) {
verification_count = 0;
biter->Prev();
if (biter->Valid()) {
ASSERT_GE(verification_count, 1);
}
}
}
template <typename TBlockIter>
void TestSeekToFirst(std::unique_ptr<TBlockIter>& biter,
size_t& verification_count) {
verification_count = 0;
biter->SeekToFirst();
ASSERT_GE(verification_count, 1);
TestIterateForward(biter, verification_count);
}
template <typename TBlockIter>
void TestSeekToLast(std::unique_ptr<TBlockIter>& biter,
size_t& verification_count) {
verification_count = 0;
biter->SeekToLast();
ASSERT_GE(verification_count, 1);
TestIterateBackward(biter, verification_count);
}
template <typename TBlockIter>
void TestSeekForPrev(std::unique_ptr<TBlockIter>& biter,
size_t& verification_count, const std::string& k) {
verification_count = 0;
biter->SeekForPrev(k);
ASSERT_GE(verification_count, 1);
TestIterateBackward(biter, verification_count);
}
template <typename TBlockIter>
void TestSeek(std::unique_ptr<TBlockIter>& biter, size_t& verification_count,
const std::string& k) {
verification_count = 0;
biter->Seek(k);
ASSERT_GE(verification_count, 1);
TestIterateForward(biter, verification_count);
}
bool VerifyChecksum(uint32_t checksum_len, const char* checksum_ptr,
const Slice& key, const Slice& val) {
if (!checksum_len) {
return checksum_ptr == nullptr;
}
return ProtectionInfo64().ProtectKV(key, val).Verify(
static_cast<uint8_t>(checksum_len), checksum_ptr);
}
};
namespace {
const BlockBasedTableOptions* kTableOptions() {
static BlockBasedTableOptions opts{};
return &opts;
}
Decompressor* kDecompressor() {
static auto mgr = GetBuiltinV2CompressionManager();
static auto decomp = mgr->GetDecompressor();
return decomp.get();
}
}
TEST_F(BlockPerKVChecksumTest, EmptyBlock) {
BlockBuilder builder(
16 , true ,
false ,
BlockBasedTableOptions::DataBlockIndexType::kDataBlockBinarySearch);
Slice raw_block = builder.Finish();
BlockContents contents;
contents.data = raw_block;
std::unique_ptr<Block_kData> data_block;
Options options = Options();
uint8_t protection_bytes_per_key = 8;
BlockCreateContext create_context{
kTableOptions(), nullptr,
nullptr , kDecompressor(),
protection_bytes_per_key, options.comparator};
create_context.Create(&data_block, std::move(contents));
std::unique_ptr<DataBlockIter> biter{data_block->NewDataIterator(
options.comparator, kDisableGlobalSequenceNumber)};
biter->SeekToFirst();
ASSERT_FALSE(biter->Valid());
ASSERT_OK(biter->status());
Random rnd(33);
biter->SeekForGet(GenerateInternalKey(1, 1, 10, &rnd));
ASSERT_FALSE(biter->Valid());
ASSERT_OK(biter->status());
biter->SeekToLast();
ASSERT_FALSE(biter->Valid());
ASSERT_OK(biter->status());
biter->Seek(GenerateInternalKey(1, 1, 10, &rnd));
ASSERT_FALSE(biter->Valid());
ASSERT_OK(biter->status());
biter->SeekForPrev(GenerateInternalKey(1, 1, 10, &rnd));
ASSERT_FALSE(biter->Valid());
ASSERT_OK(biter->status());
}
TEST_F(BlockPerKVChecksumTest, UnsupportedOptionValue) {
Options options = Options();
options.block_protection_bytes_per_key = 128;
Destroy(options);
ASSERT_TRUE(TryReopen(options).IsNotSupported());
}
TEST_F(BlockPerKVChecksumTest, InitializeProtectionInfo) {
Options options = Options();
uint8_t protection_bytes_per_key = 8;
BlockCreateContext create_context{
kTableOptions(), nullptr , nullptr ,
kDecompressor(), protection_bytes_per_key, options.comparator};
{
std::string invalid_content = "1";
Slice raw_block = invalid_content;
BlockContents contents;
contents.data = raw_block;
std::unique_ptr<Block_kData> data_block;
create_context.Create(&data_block, std::move(contents));
std::unique_ptr<DataBlockIter> iter{data_block->NewDataIterator(
options.comparator, kDisableGlobalSequenceNumber)};
ASSERT_TRUE(iter->status().IsCorruption());
}
{
std::string invalid_content = "1";
Slice raw_block = invalid_content;
BlockContents contents;
contents.data = raw_block;
std::unique_ptr<Block_kIndex> index_block;
create_context.Create(&index_block, std::move(contents));
std::unique_ptr<IndexBlockIter> iter{index_block->NewIndexIterator(
options.comparator, kDisableGlobalSequenceNumber, nullptr, nullptr,
true, false, true, true)};
ASSERT_TRUE(iter->status().IsCorruption());
}
{
std::string invalid_content = "1";
Slice raw_block = invalid_content;
BlockContents contents;
contents.data = raw_block;
std::unique_ptr<Block_kMetaIndex> meta_block;
create_context.Create(&meta_block, std::move(contents));
std::unique_ptr<MetaBlockIter> iter{meta_block->NewMetaIterator(true)};
ASSERT_TRUE(iter->status().IsCorruption());
}
}
TEST_F(BlockPerKVChecksumTest, ApproximateMemory) {
const int kNumRecords = 20;
std::vector<std::string> keys;
std::vector<std::string> values;
GenerateRandomKVs(&keys, &values, 0, kNumRecords, 1 ,
24 );
std::unique_ptr<BlockBuilder> builder;
auto generate_block_content = [&]() {
builder = std::make_unique<BlockBuilder>(16 );
for (int i = 0; i < kNumRecords; ++i) {
builder->Add(keys[i], values[i]);
}
Slice raw_block = builder->Finish();
BlockContents contents;
contents.data = raw_block;
return contents;
};
Options options = Options();
uint8_t protection_bytes_per_key = 8;
BlockCreateContext with_checksum_create_context{
kTableOptions(),
nullptr ,
nullptr ,
kDecompressor(),
protection_bytes_per_key,
options.comparator,
true };
BlockCreateContext create_context{kTableOptions(),
nullptr ,
nullptr ,
kDecompressor(),
0,
options.comparator,
true };
{
std::unique_ptr<Block_kData> data_block;
create_context.Create(&data_block, generate_block_content());
size_t block_memory = data_block->ApproximateMemoryUsage();
std::unique_ptr<Block_kData> with_checksum_data_block;
with_checksum_create_context.Create(&with_checksum_data_block,
generate_block_content());
ASSERT_GT(with_checksum_data_block->ApproximateMemoryUsage() - block_memory,
100);
}
{
std::unique_ptr<Block_kData> meta_block;
create_context.Create(&meta_block, generate_block_content());
size_t block_memory = meta_block->ApproximateMemoryUsage();
std::unique_ptr<Block_kData> with_checksum_meta_block;
with_checksum_create_context.Create(&with_checksum_meta_block,
generate_block_content());
ASSERT_GT(with_checksum_meta_block->ApproximateMemoryUsage() - block_memory,
100);
}
{
std::vector<std::string> separators;
std::vector<BlockHandle> block_handles;
std::vector<std::string> first_keys;
GenerateRandomIndexEntries(&separators, &block_handles, &first_keys,
kNumRecords);
auto generate_index_content = [&]() {
builder = std::make_unique<BlockBuilder>(16 );
BlockHandle last_encoded_handle;
for (int i = 0; i < kNumRecords; ++i) {
IndexValue entry(block_handles[i], first_keys[i]);
std::string encoded_entry;
std::string delta_encoded_entry;
entry.EncodeTo(&encoded_entry, false, nullptr);
last_encoded_handle = entry.handle;
const Slice delta_encoded_entry_slice(delta_encoded_entry);
builder->Add(separators[i], encoded_entry, &delta_encoded_entry_slice);
}
Slice raw_block = builder->Finish();
BlockContents contents;
contents.data = raw_block;
return contents;
};
std::unique_ptr<Block_kIndex> index_block;
create_context.Create(&index_block, generate_index_content());
size_t block_memory = index_block->ApproximateMemoryUsage();
std::unique_ptr<Block_kIndex> with_checksum_index_block;
with_checksum_create_context.Create(&with_checksum_index_block,
generate_index_content());
ASSERT_GT(
with_checksum_index_block->ApproximateMemoryUsage() - block_memory,
100);
}
}
std::string GetDataBlockIndexTypeStr(
BlockBasedTableOptions::DataBlockIndexType t) {
return t == BlockBasedTableOptions::DataBlockIndexType::kDataBlockBinarySearch
? "BinarySearch"
: "BinaryAndHash";
}
class DataBlockKVChecksumTest
: public BlockPerKVChecksumTest,
public testing::WithParamInterface<std::tuple<
BlockBasedTableOptions::DataBlockIndexType,
uint8_t ,
uint32_t , bool >> {
public:
DataBlockKVChecksumTest() = default;
BlockBasedTableOptions::DataBlockIndexType GetDataBlockIndexType() const {
return std::get<0>(GetParam());
}
uint8_t GetChecksumLen() const { return std::get<1>(GetParam()); }
uint32_t GetRestartInterval() const { return std::get<2>(GetParam()); }
bool GetUseDeltaEncoding() const { return std::get<3>(GetParam()); }
std::unique_ptr<Block_kData> GenerateDataBlock(
std::vector<std::string>& keys, std::vector<std::string>& values,
int num_record) {
BlockCreateContext create_context{
kTableOptions(), nullptr , nullptr ,
kDecompressor(), GetChecksumLen(), Options().comparator};
builder_ = std::make_unique<BlockBuilder>(
static_cast<int>(GetRestartInterval()),
GetUseDeltaEncoding() ,
false , GetDataBlockIndexType());
for (int i = 0; i < num_record; i++) {
builder_->Add(keys[i], values[i]);
}
Slice raw_block = builder_->Finish();
BlockContents contents;
contents.data = raw_block;
std::unique_ptr<Block_kData> data_block;
create_context.Create(&data_block, std::move(contents));
return data_block;
}
std::unique_ptr<BlockBuilder> builder_;
};
INSTANTIATE_TEST_CASE_P(
P, DataBlockKVChecksumTest,
::testing::Combine(
::testing::Values(
BlockBasedTableOptions::DataBlockIndexType::kDataBlockBinarySearch,
BlockBasedTableOptions::DataBlockIndexType::
kDataBlockBinaryAndHash),
::testing::Values(0, 1, 2, 4, 8) ,
::testing::Values(1, 2, 3, 8, 16) ,
::testing::Values(false, true)) ,
[](const testing::TestParamInfo<
std::tuple<BlockBasedTableOptions::DataBlockIndexType, uint8_t,
uint32_t, bool>>& args) {
std::ostringstream oss;
oss << GetDataBlockIndexTypeStr(std::get<0>(args.param))
<< "ProtectionPerKey" << std::to_string(std::get<1>(args.param))
<< "RestartInterval" << std::to_string(std::get<2>(args.param))
<< "DeltaEncode" << std::to_string(std::get<3>(args.param));
return oss.str();
});
TEST_P(DataBlockKVChecksumTest, ChecksumConstructionAndVerification) {
uint8_t protection_bytes_per_key = GetChecksumLen();
std::vector<int> num_restart_intervals = {1, 16};
for (const auto num_restart_interval : num_restart_intervals) {
const int kNumRecords =
num_restart_interval * static_cast<int>(GetRestartInterval());
std::vector<std::string> keys;
std::vector<std::string> values;
GenerateRandomKVs(&keys, &values, 0, kNumRecords + 1, 1 ,
24 );
SyncPoint::GetInstance()->DisableProcessing();
std::unique_ptr<Block_kData> data_block =
GenerateDataBlock(keys, values, kNumRecords);
const char* checksum_ptr = data_block->TEST_GetKVChecksum();
for (int i = 0; i < kNumRecords; i++) {
ASSERT_TRUE(VerifyChecksum(protection_bytes_per_key,
checksum_ptr + i * protection_bytes_per_key,
keys[i], values[i]));
}
std::vector<SequenceNumber> seqnos{kDisableGlobalSequenceNumber, 0};
size_t verification_count = 0;
SyncPoint::GetInstance()->SetCallBack(
"Block::VerifyChecksum::checksum_len",
[&verification_count, protection_bytes_per_key](void* checksum_len) {
ASSERT_EQ((*static_cast<uint8_t*>(checksum_len)),
protection_bytes_per_key);
++verification_count;
});
SyncPoint::GetInstance()->EnableProcessing();
for (const auto seqno : seqnos) {
std::unique_ptr<DataBlockIter> biter{
data_block->NewDataIterator(Options().comparator, seqno)};
biter->SeekForGet(keys[kNumRecords]);
TestIterateForward(biter, verification_count);
verification_count = 0;
biter->SeekForGet(keys[kNumRecords / 2]);
ASSERT_GE(verification_count, 1);
TestIterateForward(biter, verification_count);
TestSeekToFirst(biter, verification_count);
TestSeekToLast(biter, verification_count);
TestSeekForPrev(biter, verification_count, keys[kNumRecords / 2]);
TestSeek(biter, verification_count, keys[kNumRecords / 2]);
}
}
}
class IndexBlockKVChecksumTest
: public BlockPerKVChecksumTest,
public testing::WithParamInterface<
std::tuple<BlockBasedTableOptions::DataBlockIndexType, uint8_t,
uint32_t, bool, bool>> {
public:
IndexBlockKVChecksumTest() = default;
BlockBasedTableOptions::DataBlockIndexType GetDataBlockIndexType() const {
return std::get<0>(GetParam());
}
uint8_t GetChecksumLen() const { return std::get<1>(GetParam()); }
uint32_t GetRestartInterval() const { return std::get<2>(GetParam()); }
bool UseValueDeltaEncoding() const { return std::get<3>(GetParam()); }
bool IncludeFirstKey() const { return std::get<4>(GetParam()); }
std::unique_ptr<Block_kIndex> GenerateIndexBlock(
std::vector<std::string>& separators,
std::vector<BlockHandle>& block_handles,
std::vector<std::string>& first_keys, int num_record) {
Options options = Options();
uint8_t protection_bytes_per_key = GetChecksumLen();
BlockCreateContext create_context{
kTableOptions(),
nullptr ,
nullptr ,
kDecompressor(),
protection_bytes_per_key,
options.comparator,
!UseValueDeltaEncoding() ,
IncludeFirstKey()};
builder_ = std::make_unique<BlockBuilder>(
static_cast<int>(GetRestartInterval()), true ,
UseValueDeltaEncoding() ,
GetDataBlockIndexType());
BlockHandle last_encoded_handle;
for (int i = 0; i < num_record; i++) {
IndexValue entry(block_handles[i], first_keys[i]);
std::string encoded_entry;
std::string delta_encoded_entry;
entry.EncodeTo(&encoded_entry, IncludeFirstKey(), nullptr);
if (UseValueDeltaEncoding() && i > 0) {
entry.EncodeTo(&delta_encoded_entry, IncludeFirstKey(),
&last_encoded_handle);
}
last_encoded_handle = entry.handle;
const Slice delta_encoded_entry_slice(delta_encoded_entry);
builder_->Add(separators[i], encoded_entry, &delta_encoded_entry_slice);
}
Slice raw_block = builder_->Finish();
BlockContents contents;
contents.data = raw_block;
std::unique_ptr<Block_kIndex> index_block;
create_context.Create(&index_block, std::move(contents));
return index_block;
}
std::unique_ptr<BlockBuilder> builder_;
};
INSTANTIATE_TEST_CASE_P(
P, IndexBlockKVChecksumTest,
::testing::Combine(
::testing::Values(
BlockBasedTableOptions::DataBlockIndexType::kDataBlockBinarySearch,
BlockBasedTableOptions::DataBlockIndexType::
kDataBlockBinaryAndHash),
::testing::Values(0, 1, 2, 4, 8), ::testing::Values(1, 3, 8, 16),
::testing::Values(true, false), ::testing::Values(true, false)),
[](const testing::TestParamInfo<
std::tuple<BlockBasedTableOptions::DataBlockIndexType, uint8_t,
uint32_t, bool, bool>>& args) {
std::ostringstream oss;
oss << GetDataBlockIndexTypeStr(std::get<0>(args.param)) << "ProtBytes"
<< std::to_string(std::get<1>(args.param)) << "RestartInterval"
<< std::to_string(std::get<2>(args.param)) << "ValueDeltaEncode"
<< std::to_string(std::get<3>(args.param)) << "IncludeFirstKey"
<< std::to_string(std::get<4>(args.param));
return oss.str();
});
TEST_P(IndexBlockKVChecksumTest, ChecksumConstructionAndVerification) {
Options options = Options();
uint8_t protection_bytes_per_key = GetChecksumLen();
std::vector<int> num_restart_intervals = {1, 16};
std::vector<SequenceNumber> seqnos{kDisableGlobalSequenceNumber, 10001};
for (const auto num_restart_interval : num_restart_intervals) {
const int kNumRecords =
num_restart_interval * static_cast<int>(GetRestartInterval());
for (const auto seqno : seqnos) {
std::vector<std::string> separators;
std::vector<BlockHandle> block_handles;
std::vector<std::string> first_keys;
GenerateRandomIndexEntries(&separators, &block_handles, &first_keys,
kNumRecords, 0 );
SyncPoint::GetInstance()->DisableProcessing();
std::unique_ptr<Block_kIndex> index_block = GenerateIndexBlock(
separators, block_handles, first_keys, kNumRecords);
IndexBlockIter* kNullIter = nullptr;
Statistics* kNullStats = nullptr;
std::unique_ptr<IndexBlockIter> biter{index_block->NewIndexIterator(
options.comparator, seqno, kNullIter, kNullStats,
true , IncludeFirstKey() ,
true ,
!UseValueDeltaEncoding() ,
true ,
true ,
nullptr )};
biter->SeekToFirst();
const char* checksum_ptr = index_block->TEST_GetKVChecksum();
for (int i = 0; i < kNumRecords; i++) {
ASSERT_TRUE(VerifyChecksum(protection_bytes_per_key, checksum_ptr,
biter->key(), biter->raw_value()));
}
size_t verification_count = 0;
SyncPoint::GetInstance()->SetCallBack(
"Block::VerifyChecksum::checksum_len",
[&verification_count, protection_bytes_per_key](void* checksum_len) {
ASSERT_EQ((*static_cast<uint8_t*>(checksum_len)),
protection_bytes_per_key);
++verification_count;
});
SyncPoint::GetInstance()->EnableProcessing();
TestSeekToFirst(biter, verification_count);
TestSeekToLast(biter, verification_count);
TestSeek(biter, verification_count, first_keys[kNumRecords / 2]);
}
}
}
class MetaIndexBlockKVChecksumTest
: public BlockPerKVChecksumTest,
public testing::WithParamInterface<
uint8_t > {
public:
MetaIndexBlockKVChecksumTest() = default;
uint8_t GetChecksumLen() const { return GetParam(); }
uint32_t GetRestartInterval() const { return 1; }
std::unique_ptr<Block_kMetaIndex> GenerateMetaIndexBlock(
std::vector<std::string>& keys, std::vector<std::string>& values,
int num_record) {
Options options = Options();
uint8_t protection_bytes_per_key = GetChecksumLen();
BlockCreateContext create_context{
kTableOptions(), nullptr , nullptr ,
kDecompressor(), protection_bytes_per_key, options.comparator};
builder_ =
std::make_unique<BlockBuilder>(static_cast<int>(GetRestartInterval()));
for (int i = 0; i < num_record; i++) {
builder_->Add(keys[i], values[i]);
}
Slice raw_block = builder_->Finish();
BlockContents contents;
contents.data = raw_block;
std::unique_ptr<Block_kMetaIndex> meta_block;
create_context.Create(&meta_block, std::move(contents));
return meta_block;
}
std::unique_ptr<BlockBuilder> builder_;
};
INSTANTIATE_TEST_CASE_P(P, MetaIndexBlockKVChecksumTest,
::testing::Values(0, 1, 2, 4, 8),
[](const testing::TestParamInfo<uint8_t>& args) {
std::ostringstream oss;
oss << "ProtBytes" << std::to_string(args.param);
return oss.str();
});
TEST_P(MetaIndexBlockKVChecksumTest, ChecksumConstructionAndVerification) {
Options options = Options();
uint8_t protection_bytes_per_key = GetChecksumLen();
BlockCreateContext create_context{
kTableOptions(), nullptr , nullptr ,
kDecompressor(), protection_bytes_per_key, options.comparator};
std::vector<int> num_restart_intervals = {1, 16};
for (const auto num_restart_interval : num_restart_intervals) {
const int kNumRecords = num_restart_interval * GetRestartInterval();
std::vector<std::string> keys;
std::vector<std::string> values;
GenerateRandomKVs(&keys, &values, 0, kNumRecords + 1, 1 ,
24 );
SyncPoint::GetInstance()->DisableProcessing();
std::unique_ptr<Block_kMetaIndex> meta_block =
GenerateMetaIndexBlock(keys, values, kNumRecords);
const char* checksum_ptr = meta_block->TEST_GetKVChecksum();
for (int i = 0; i < kNumRecords; i++) {
ASSERT_TRUE(VerifyChecksum(protection_bytes_per_key,
checksum_ptr + i * protection_bytes_per_key,
keys[i], values[i]));
}
size_t verification_count = 0;
SyncPoint::GetInstance()->SetCallBack(
"Block::VerifyChecksum::checksum_len",
[&verification_count, protection_bytes_per_key](void* checksum_len) {
ASSERT_EQ((*static_cast<uint8_t*>(checksum_len)),
protection_bytes_per_key);
++verification_count;
});
SyncPoint::GetInstance()->EnableProcessing();
std::unique_ptr<MetaBlockIter> biter{
meta_block->NewMetaIterator(true )};
TestSeekToFirst(biter, verification_count);
TestSeekToLast(biter, verification_count);
TestSeek(biter, verification_count, keys[kNumRecords / 2]);
TestSeekForPrev(biter, verification_count, keys[kNumRecords / 2]);
}
}
class DataBlockKVChecksumCorruptionTest : public DataBlockKVChecksumTest {
public:
DataBlockKVChecksumCorruptionTest() = default;
std::unique_ptr<DataBlockIter> GenerateDataBlockIter(
std::vector<std::string>& keys, std::vector<std::string>& values,
int num_record) {
SyncPoint::GetInstance()->DisableProcessing();
block_ = GenerateDataBlock(keys, values, num_record);
std::unique_ptr<DataBlockIter> biter{block_->NewDataIterator(
Options().comparator, kDisableGlobalSequenceNumber)};
SyncPoint::GetInstance()->EnableProcessing();
return biter;
}
protected:
std::unique_ptr<Block_kData> block_;
};
TEST_P(DataBlockKVChecksumCorruptionTest, CorruptEntry) {
std::vector<int> num_restart_intervals = {1, 3};
for (const auto num_restart_interval : num_restart_intervals) {
const int kNumRecords =
num_restart_interval * static_cast<int>(GetRestartInterval());
std::vector<std::string> keys;
std::vector<std::string> values;
GenerateRandomKVs(&keys, &values, 0, kNumRecords + 1, 1 ,
24 );
SyncPoint::GetInstance()->SetCallBack(
"BlockIter::UpdateKey::value", [](void* arg) {
char* value = static_cast<char*>(arg);
++value[10];
});
typedef std::unique_ptr<DataBlockIter> IterPtr;
typedef void(IterAPI)(IterPtr & iter, std::string&);
std::string seek_key = keys[kNumRecords / 2];
auto test_seek = [&](IterAPI iter_api) {
IterPtr biter = GenerateDataBlockIter(keys, values, kNumRecords);
ASSERT_OK(biter->status());
iter_api(biter, seek_key);
ASSERT_FALSE(biter->Valid());
ASSERT_TRUE(biter->status().IsCorruption());
};
test_seek([](IterPtr& iter, std::string&) { iter->SeekToFirst(); });
test_seek([](IterPtr& iter, std::string&) { iter->SeekToLast(); });
test_seek([](IterPtr& iter, std::string& k) { iter->Seek(k); });
test_seek([](IterPtr& iter, std::string& k) { iter->SeekForPrev(k); });
test_seek([](IterPtr& iter, std::string& k) { iter->SeekForGet(k); });
typedef void (DataBlockIter::*IterStepAPI)();
auto test_step = [&](IterStepAPI iter_api, std::string& k) {
IterPtr biter = GenerateDataBlockIter(keys, values, kNumRecords);
SyncPoint::GetInstance()->DisableProcessing();
biter->Seek(k);
ASSERT_TRUE(biter->Valid());
ASSERT_OK(biter->status());
SyncPoint::GetInstance()->EnableProcessing();
std::invoke(iter_api, biter);
ASSERT_FALSE(biter->Valid());
ASSERT_TRUE(biter->status().IsCorruption());
};
if (kNumRecords > 1) {
test_step(&DataBlockIter::Prev, seek_key);
test_step(&DataBlockIter::Next, seek_key);
}
}
}
INSTANTIATE_TEST_CASE_P(
P, DataBlockKVChecksumCorruptionTest,
::testing::Combine(
::testing::Values(
BlockBasedTableOptions::DataBlockIndexType::kDataBlockBinarySearch,
BlockBasedTableOptions::DataBlockIndexType::
kDataBlockBinaryAndHash),
::testing::Values(4, 8) ,
::testing::Values(1, 3, 8, 16) ,
::testing::Values(false, true)),
[](const testing::TestParamInfo<
std::tuple<BlockBasedTableOptions::DataBlockIndexType, uint8_t,
uint32_t, bool>>& args) {
std::ostringstream oss;
oss << GetDataBlockIndexTypeStr(std::get<0>(args.param)) << "ProtBytes"
<< std::to_string(std::get<1>(args.param)) << "RestartInterval"
<< std::to_string(std::get<2>(args.param)) << "DeltaEncode"
<< std::to_string(std::get<3>(args.param));
return oss.str();
});
class IndexBlockKVChecksumCorruptionTest : public IndexBlockKVChecksumTest {
public:
IndexBlockKVChecksumCorruptionTest() = default;
std::unique_ptr<IndexBlockIter> GenerateIndexBlockIter(
std::vector<std::string>& separators,
std::vector<BlockHandle>& block_handles,
std::vector<std::string>& first_keys, int num_record,
SequenceNumber seqno) {
SyncPoint::GetInstance()->DisableProcessing();
block_ =
GenerateIndexBlock(separators, block_handles, first_keys, num_record);
std::unique_ptr<IndexBlockIter> biter{block_->NewIndexIterator(
Options().comparator, seqno, nullptr, nullptr,
true , IncludeFirstKey() ,
true ,
!UseValueDeltaEncoding() ,
true ,
true ,
nullptr )};
SyncPoint::GetInstance()->EnableProcessing();
return biter;
}
protected:
std::unique_ptr<Block_kIndex> block_;
};
INSTANTIATE_TEST_CASE_P(
P, IndexBlockKVChecksumCorruptionTest,
::testing::Combine(
::testing::Values(
BlockBasedTableOptions::DataBlockIndexType::kDataBlockBinarySearch,
BlockBasedTableOptions::DataBlockIndexType::
kDataBlockBinaryAndHash),
::testing::Values(4, 8) ,
::testing::Values(1, 3, 8, 16) ,
::testing::Values(true, false), ::testing::Values(true, false)),
[](const testing::TestParamInfo<
std::tuple<BlockBasedTableOptions::DataBlockIndexType, uint8_t,
uint32_t, bool, bool>>& args) {
std::ostringstream oss;
oss << GetDataBlockIndexTypeStr(std::get<0>(args.param)) << "ProtBytes"
<< std::to_string(std::get<1>(args.param)) << "RestartInterval"
<< std::to_string(std::get<2>(args.param)) << "ValueDeltaEncode"
<< std::to_string(std::get<3>(args.param)) << "IncludeFirstKey"
<< std::to_string(std::get<4>(args.param));
return oss.str();
});
TEST_P(IndexBlockKVChecksumCorruptionTest, CorruptEntry) {
std::vector<int> num_restart_intervals = {1, 3};
std::vector<SequenceNumber> seqnos{kDisableGlobalSequenceNumber, 10001};
for (const auto num_restart_interval : num_restart_intervals) {
const int kNumRecords =
num_restart_interval * static_cast<int>(GetRestartInterval());
for (const auto seqno : seqnos) {
std::vector<std::string> separators;
std::vector<BlockHandle> block_handles;
std::vector<std::string> first_keys;
GenerateRandomIndexEntries(&separators, &block_handles, &first_keys,
kNumRecords, 0 );
SyncPoint::GetInstance()->SetCallBack(
"BlockIter::UpdateKey::value", [](void* arg) {
char* value = static_cast<char*>(arg);
++value[0];
});
typedef std::unique_ptr<IndexBlockIter> IterPtr;
typedef void(IterAPI)(IterPtr & iter, std::string&);
std::string seek_key = first_keys[kNumRecords / 2];
auto test_seek = [&](IterAPI iter_api) {
std::unique_ptr<IndexBlockIter> biter = GenerateIndexBlockIter(
separators, block_handles, first_keys, kNumRecords, seqno);
ASSERT_OK(biter->status());
iter_api(biter, seek_key);
ASSERT_FALSE(biter->Valid());
ASSERT_TRUE(biter->status().IsCorruption());
};
test_seek([](IterPtr& iter, std::string&) { iter->SeekToFirst(); });
test_seek([](IterPtr& iter, std::string&) { iter->SeekToLast(); });
test_seek([](IterPtr& iter, std::string& k) { iter->Seek(k); });
typedef void (IndexBlockIter::*IterStepAPI)();
auto test_step = [&](IterStepAPI iter_api, std::string& k) {
std::unique_ptr<IndexBlockIter> biter = GenerateIndexBlockIter(
separators, block_handles, first_keys, kNumRecords, seqno);
SyncPoint::GetInstance()->DisableProcessing();
biter->Seek(k);
ASSERT_TRUE(biter->Valid());
ASSERT_OK(biter->status());
SyncPoint::GetInstance()->EnableProcessing();
std::invoke(iter_api, biter);
ASSERT_FALSE(biter->Valid());
ASSERT_TRUE(biter->status().IsCorruption());
};
if (kNumRecords > 1) {
test_step(&IndexBlockIter::Prev, seek_key);
test_step(&IndexBlockIter::Next, seek_key);
}
}
}
}
class MetaIndexBlockKVChecksumCorruptionTest
: public MetaIndexBlockKVChecksumTest {
public:
MetaIndexBlockKVChecksumCorruptionTest() = default;
std::unique_ptr<MetaBlockIter> GenerateMetaIndexBlockIter(
std::vector<std::string>& keys, std::vector<std::string>& values,
int num_record) {
SyncPoint::GetInstance()->DisableProcessing();
block_ = GenerateMetaIndexBlock(keys, values, num_record);
std::unique_ptr<MetaBlockIter> biter{
block_->NewMetaIterator(true )};
SyncPoint::GetInstance()->EnableProcessing();
return biter;
}
protected:
std::unique_ptr<Block_kMetaIndex> block_;
};
INSTANTIATE_TEST_CASE_P(
P, MetaIndexBlockKVChecksumCorruptionTest,
::testing::Values(4, 8) ,
[](const testing::TestParamInfo<uint8_t>& args) {
std::ostringstream oss;
oss << "ProtBytes" << std::to_string(args.param);
return oss.str();
});
TEST_P(MetaIndexBlockKVChecksumCorruptionTest, CorruptEntry) {
Options options = Options();
std::vector<int> num_restart_intervals = {1, 3};
for (const auto num_restart_interval : num_restart_intervals) {
const int kNumRecords =
num_restart_interval * static_cast<int>(GetRestartInterval());
std::vector<std::string> keys;
std::vector<std::string> values;
GenerateRandomKVs(&keys, &values, 0, kNumRecords + 1, 1 ,
24 );
SyncPoint::GetInstance()->SetCallBack(
"BlockIter::UpdateKey::value", [](void* arg) {
char* value = static_cast<char*>(arg);
++value[10];
});
typedef std::unique_ptr<MetaBlockIter> IterPtr;
typedef void(IterAPI)(IterPtr & iter, std::string&);
typedef void (MetaBlockIter::*IterStepAPI)();
std::string seek_key = keys[kNumRecords / 2];
auto test_seek = [&](IterAPI iter_api) {
IterPtr biter = GenerateMetaIndexBlockIter(keys, values, kNumRecords);
ASSERT_OK(biter->status());
iter_api(biter, seek_key);
ASSERT_FALSE(biter->Valid());
ASSERT_TRUE(biter->status().IsCorruption());
};
test_seek([](IterPtr& iter, std::string&) { iter->SeekToFirst(); });
test_seek([](IterPtr& iter, std::string&) { iter->SeekToLast(); });
test_seek([](IterPtr& iter, std::string& k) { iter->Seek(k); });
test_seek([](IterPtr& iter, std::string& k) { iter->SeekForPrev(k); });
auto test_step = [&](IterStepAPI iter_api, const std::string& k) {
IterPtr biter = GenerateMetaIndexBlockIter(keys, values, kNumRecords);
SyncPoint::GetInstance()->DisableProcessing();
biter->Seek(k);
ASSERT_TRUE(biter->Valid());
ASSERT_OK(biter->status());
SyncPoint::GetInstance()->EnableProcessing();
std::invoke(iter_api, biter);
ASSERT_FALSE(biter->Valid());
ASSERT_TRUE(biter->status().IsCorruption());
};
if (kNumRecords > 1) {
test_step(&MetaBlockIter::Prev, seek_key);
test_step(&MetaBlockIter::Next, seek_key);
}
}
}
class MetaBlockEntryCorruptionTest : public testing::TestWithParam<bool> {
public:
bool useSeparatedKVStorage() const { return GetParam(); }
std::string BuildBlock() {
BlockBuilder builder(1 ,
true ,
false ,
BlockBasedTableOptions::kDataBlockBinarySearch,
0 ,
0 , false ,
true , useSeparatedKVStorage());
builder.Add("key001", "val01");
builder.Add("key002", "val02");
builder.Add("key003", "val03");
builder.Add("key004", "val04");
Slice raw = builder.Finish();
return std::string(raw.data(), raw.size());
}
uint32_t GetRestartOffset(const std::string& block_data, int restart_idx) {
size_t footer_size = useSeparatedKVStorage() ? 8 : 4;
uint32_t packed = DecodeFixed32(block_data.data() + block_data.size() - 4);
uint32_t num_restarts = packed & DataBlockFooter::kMaxNumRestarts;
size_t restarts_start =
block_data.size() - footer_size - num_restarts * sizeof(uint32_t);
return DecodeFixed32(block_data.data() + restarts_start +
restart_idx * sizeof(uint32_t));
}
uint32_t GetKeyEnd(const std::string& block_data) {
size_t footer_size = useSeparatedKVStorage() ? 8 : 4;
uint32_t packed = DecodeFixed32(block_data.data() + block_data.size() - 4);
uint32_t num_restarts = packed & DataBlockFooter::kMaxNumRestarts;
uint32_t restarts_start = static_cast<uint32_t>(
block_data.size() - footer_size - num_restarts * sizeof(uint32_t));
if (useSeparatedKVStorage()) {
return DecodeFixed32(block_data.data() + block_data.size() - 8);
}
return restarts_start;
}
uint32_t GetValueEnd(const std::string& block_data) {
size_t footer_size = useSeparatedKVStorage() ? 8 : 4;
uint32_t packed = DecodeFixed32(block_data.data() + block_data.size() - 4);
uint32_t num_restarts = packed & DataBlockFooter::kMaxNumRestarts;
return static_cast<uint32_t>(block_data.size() - footer_size -
num_restarts * sizeof(uint32_t));
}
};
INSTANTIATE_TEST_CASE_P(P, MetaBlockEntryCorruptionTest, ::testing::Bool(),
[](const testing::TestParamInfo<bool>& args) {
return args.param ? "SeparatedKV" : "InlineKV";
});
TEST_P(MetaBlockEntryCorruptionTest, CorruptedKeyLengthPastKeyEnd) {
std::string block_data = BuildBlock();
uint32_t key_end = GetKeyEnd(block_data);
uint32_t first_entry_offset = GetRestartOffset(block_data, 0);
block_data[first_entry_offset + 1] = static_cast<char>(key_end);
BlockContents contents;
contents.data = Slice(block_data);
Block block(std::move(contents), 0, nullptr, 1 );
std::unique_ptr<MetaBlockIter> iter(block.NewMetaIterator());
iter->SeekToFirst();
ASSERT_FALSE(iter->Valid());
ASSERT_TRUE(iter->status().IsCorruption());
}
TEST_P(MetaBlockEntryCorruptionTest, CorruptedValueLengthPastValueEnd) {
std::string block_data = BuildBlock();
uint32_t value_end = GetValueEnd(block_data);
uint32_t first_entry_offset = GetRestartOffset(block_data, 0);
block_data[first_entry_offset + 2] = static_cast<char>(value_end);
BlockContents contents;
contents.data = Slice(block_data);
Block block(std::move(contents), 0, nullptr, 1 );
std::unique_ptr<MetaBlockIter> iter(block.NewMetaIterator());
iter->SeekToFirst();
ASSERT_FALSE(iter->Valid());
ASSERT_TRUE(iter->status().IsCorruption());
}
}
int main(int argc, char** argv) {
ROCKSDB_NAMESPACE::port::InstallStackTraceHandler();
::testing::InitGoogleTest(&argc, argv);
return RUN_ALL_TESTS();
}