#include "table/block_based/data_block_hash_index.h"
#include <cstdlib>
#include <string>
#include <unordered_map>
#include "db/table_properties_collector.h"
#include "rocksdb/slice.h"
#include "table/block_based/block.h"
#include "table/block_based/block_based_table_reader.h"
#include "table/block_based/block_builder.h"
#include "table/get_context.h"
#include "table/table_builder.h"
#include "test_util/testharness.h"
#include "test_util/testutil.h"
#include "util/random.h"
namespace ROCKSDB_NAMESPACE {
bool SearchForOffset(DataBlockHashIndex& index, const char* data,
uint16_t map_offset, const Slice& key,
uint8_t& restart_point) {
uint8_t entry = index.Lookup(data, map_offset, key);
if (entry == kCollision) {
return true;
}
if (entry == kNoEntry) {
return false;
}
return entry == restart_point;
}
std::string GenerateKey(int primary_key, int secondary_key, int padding_size,
Random* rnd) {
char buf[50];
char* p = &buf[0];
snprintf(buf, sizeof(buf), "%6d%4d", primary_key, secondary_key);
std::string k(p);
if (padding_size) {
k += rnd->RandomString(padding_size);
}
return k;
}
void GenerateRandomKVs(std::vector<std::string>* keys,
std::vector<std::string>* values, const int from,
const int len, const int step = 1,
const int padding_size = 0,
const int keys_share_prefix = 1) {
Random rnd(302);
for (int i = from; i < from + len; i += step) {
for (int j = 0; j < keys_share_prefix; ++j) {
keys->emplace_back(GenerateKey(i, j, padding_size, &rnd));
values->emplace_back(rnd.RandomString(100));
}
}
}
TEST(DataBlockHashIndex, DataBlockHashTestSmall) {
DataBlockHashIndexBuilder builder;
builder.Initialize(0.75 );
for (int j = 0; j < 5; j++) {
for (uint8_t i = 0; i < 2 + j; i++) {
std::string key("key" + std::to_string(i));
uint8_t restart_point = i;
builder.Add(key, restart_point);
}
size_t estimated_size = builder.EstimateSize();
std::string buffer("fake"), buffer2;
size_t original_size = buffer.size();
estimated_size += original_size;
builder.Finish(buffer);
ASSERT_EQ(buffer.size(), estimated_size);
buffer2 = buffer;
Slice s(buffer2);
DataBlockHashIndex index;
uint16_t map_offset;
index.Initialize(s.data(), static_cast<uint16_t>(s.size()), &map_offset);
ASSERT_EQ(original_size, map_offset);
for (uint8_t i = 0; i < 2; i++) {
std::string key("key" + std::to_string(i));
uint8_t restart_point = i;
ASSERT_TRUE(
SearchForOffset(index, s.data(), map_offset, key, restart_point));
}
builder.Reset();
}
}
TEST(DataBlockHashIndex, DataBlockHashTest) {
DataBlockHashIndexBuilder builder;
builder.Initialize(0.75 );
for (uint8_t i = 0; i < 100; i++) {
std::string key("key" + std::to_string(i));
uint8_t restart_point = i;
builder.Add(key, restart_point);
}
size_t estimated_size = builder.EstimateSize();
std::string buffer("fake content"), buffer2;
size_t original_size = buffer.size();
estimated_size += original_size;
builder.Finish(buffer);
ASSERT_EQ(buffer.size(), estimated_size);
buffer2 = buffer;
Slice s(buffer2);
DataBlockHashIndex index;
uint16_t map_offset;
index.Initialize(s.data(), static_cast<uint16_t>(s.size()), &map_offset);
ASSERT_EQ(original_size, map_offset);
for (uint8_t i = 0; i < 100; i++) {
std::string key("key" + std::to_string(i));
uint8_t restart_point = i;
ASSERT_TRUE(
SearchForOffset(index, s.data(), map_offset, key, restart_point));
}
}
TEST(DataBlockHashIndex, DataBlockHashTestCollision) {
DataBlockHashIndexBuilder builder;
builder.Initialize(0.75 );
for (uint8_t i = 0; i < 100; i++) {
std::string key("key" + std::to_string(i));
uint8_t restart_point = i;
builder.Add(key, restart_point);
}
size_t estimated_size = builder.EstimateSize();
std::string buffer("some other fake content to take up space"), buffer2;
size_t original_size = buffer.size();
estimated_size += original_size;
builder.Finish(buffer);
ASSERT_EQ(buffer.size(), estimated_size);
buffer2 = buffer;
Slice s(buffer2);
DataBlockHashIndex index;
uint16_t map_offset;
index.Initialize(s.data(), static_cast<uint16_t>(s.size()), &map_offset);
ASSERT_EQ(original_size, map_offset);
for (uint8_t i = 0; i < 100; i++) {
std::string key("key" + std::to_string(i));
uint8_t restart_point = i;
ASSERT_TRUE(
SearchForOffset(index, s.data(), map_offset, key, restart_point));
}
}
TEST(DataBlockHashIndex, DataBlockHashTestLarge) {
DataBlockHashIndexBuilder builder;
builder.Initialize(0.75 );
std::unordered_map<std::string, uint8_t> m;
for (uint8_t i = 0; i < 100; i++) {
if (i % 2) {
continue; }
std::string key = "key" + std::to_string(i);
uint8_t restart_point = i;
builder.Add(key, restart_point);
m[key] = restart_point;
}
size_t estimated_size = builder.EstimateSize();
std::string buffer("filling stuff"), buffer2;
size_t original_size = buffer.size();
estimated_size += original_size;
builder.Finish(buffer);
ASSERT_EQ(buffer.size(), estimated_size);
buffer2 = buffer;
Slice s(buffer2);
DataBlockHashIndex index;
uint16_t map_offset;
index.Initialize(s.data(), static_cast<uint16_t>(s.size()), &map_offset);
ASSERT_EQ(original_size, map_offset);
for (uint8_t i = 0; i < 100; i++) {
std::string key = "key" + std::to_string(i);
uint8_t restart_point = i;
if (m.count(key)) {
ASSERT_TRUE(m[key] == restart_point);
ASSERT_TRUE(
SearchForOffset(index, s.data(), map_offset, key, restart_point));
} else {
}
}
}
TEST(DataBlockHashIndex, RestartIndexExceedMax) {
DataBlockHashIndexBuilder builder;
builder.Initialize(0.75 );
std::unordered_map<std::string, uint8_t> m;
for (uint8_t i = 0; i <= 253; i++) {
std::string key = "key" + std::to_string(i);
uint8_t restart_point = i;
builder.Add(key, restart_point);
}
ASSERT_TRUE(builder.Valid());
builder.Reset();
for (uint8_t i = 0; i <= 254; i++) {
std::string key = "key" + std::to_string(i);
uint8_t restart_point = i;
builder.Add(key, restart_point);
}
ASSERT_FALSE(builder.Valid());
builder.Reset();
ASSERT_TRUE(builder.Valid());
}
TEST(DataBlockHashIndex, BlockRestartIndexExceedMax) {
Options options = Options();
BlockBuilder builder(1 ,
true ,
false ,
BlockBasedTableOptions::kDataBlockBinaryAndHash);
for (int i = 0; i <= 253; i++) {
std::string ukey = "key" + std::to_string(i);
InternalKey ikey(ukey, 0, kTypeValue);
builder.Add(ikey.Encode().ToString(), "value");
}
{
Slice rawblock = builder.Finish();
BlockContents contents;
contents.data = rawblock;
Block reader(std::move(contents));
ASSERT_EQ(reader.IndexType(),
BlockBasedTableOptions::kDataBlockBinaryAndHash);
}
builder.Reset();
for (int i = 0; i <= 254; i++) {
std::string ukey = "key" + std::to_string(i);
InternalKey ikey(ukey, 0, kTypeValue);
builder.Add(ikey.Encode().ToString(), "value");
}
{
Slice rawblock = builder.Finish();
BlockContents contents;
contents.data = rawblock;
Block reader(std::move(contents));
ASSERT_EQ(reader.IndexType(),
BlockBasedTableOptions::kDataBlockBinarySearch);
}
}
TEST(DataBlockHashIndex, BlockSizeExceedMax) {
Options options = Options();
std::string ukey(10, 'k');
InternalKey ikey(ukey, 0, kTypeValue);
BlockBuilder builder(1 ,
false ,
false ,
BlockBasedTableOptions::kDataBlockBinaryAndHash);
{
std::string value(65502, 'v');
builder.Add(ikey.Encode().ToString(), value);
Slice rawblock = builder.Finish();
ASSERT_LE(rawblock.size(), kMaxBlockSizeSupportedByHashIndex);
std::cerr << "block size: " << rawblock.size() << std::endl;
BlockContents contents;
contents.data = rawblock;
Block reader(std::move(contents));
ASSERT_EQ(reader.IndexType(),
BlockBasedTableOptions::kDataBlockBinaryAndHash);
}
builder.Reset();
{
std::string value(65503, 'v');
builder.Add(ikey.Encode().ToString(), value);
Slice rawblock = builder.Finish();
ASSERT_LE(rawblock.size(), kMaxBlockSizeSupportedByHashIndex);
std::cerr << "block size: " << rawblock.size() << std::endl;
BlockContents contents;
contents.data = rawblock;
Block reader(std::move(contents));
ASSERT_EQ(reader.IndexType(),
BlockBasedTableOptions::kDataBlockBinarySearch);
}
}
TEST(DataBlockHashIndex, BlockTestSingleKey) {
Options options = Options();
BlockBuilder builder(16 ,
true ,
false ,
BlockBasedTableOptions::kDataBlockBinaryAndHash);
std::string ukey("gopher");
std::string value("gold");
InternalKey ikey(ukey, 10, kTypeValue);
builder.Add(ikey.Encode().ToString(), value );
Slice rawblock = builder.Finish();
BlockContents contents;
contents.data = rawblock;
Block reader(std::move(contents));
const InternalKeyComparator icmp(BytewiseComparator());
auto iter = reader.NewDataIterator(icmp.user_comparator(),
kDisableGlobalSequenceNumber);
bool may_exist;
{
InternalKey seek_ikey(ukey, 10, kValueTypeForSeek);
may_exist = iter->SeekForGet(seek_ikey.Encode().ToString());
ASSERT_TRUE(may_exist);
ASSERT_TRUE(iter->Valid());
ASSERT_EQ(
options.comparator->Compare(iter->key(), ikey.Encode().ToString()), 0);
ASSERT_EQ(iter->value(), value);
}
{
InternalKey seek_ikey(ukey, 20, kValueTypeForSeek);
may_exist = iter->SeekForGet(seek_ikey.Encode().ToString());
ASSERT_TRUE(may_exist);
ASSERT_TRUE(iter->Valid());
ASSERT_EQ(options.comparator->Compare(ExtractUserKey(iter->key()), ukey),
0);
ASSERT_GT(GetInternalKeySeqno(seek_ikey.Encode()),
GetInternalKeySeqno(iter->key()));
ASSERT_EQ(iter->value(), value);
}
{
InternalKey seek_ikey(ukey, 5, kValueTypeForSeek);
may_exist = iter->SeekForGet(seek_ikey.Encode().ToString());
ASSERT_TRUE(may_exist);
ASSERT_FALSE(iter->Valid()); }
delete iter;
}
TEST(DataBlockHashIndex, BlockTestLarge) {
Random rnd(1019);
Options options = Options();
std::vector<std::string> keys;
std::vector<std::string> values;
BlockBuilder builder(16 ,
true ,
false ,
BlockBasedTableOptions::kDataBlockBinaryAndHash);
int num_records = 500;
GenerateRandomKVs(&keys, &values, 0, num_records);
for (int i = 0; i < num_records; i++) {
std::string ukey(keys[i] + "1" );
InternalKey ikey(ukey, 0, kTypeValue);
builder.Add(ikey.Encode().ToString(), values[i]);
}
Slice rawblock = builder.Finish();
BlockContents contents;
contents.data = rawblock;
Block reader(std::move(contents));
const InternalKeyComparator icmp(BytewiseComparator());
for (int i = 0; i < num_records; i++) {
auto iter = reader.NewDataIterator(icmp.user_comparator(),
kDisableGlobalSequenceNumber);
int index = rnd.Uniform(num_records);
std::string ukey(keys[index] + "1" );
InternalKey ikey(ukey, 0, kTypeValue);
bool may_exist = iter->SeekForGet(ikey.Encode().ToString());
ASSERT_TRUE(may_exist);
ASSERT_TRUE(iter->Valid());
ASSERT_EQ(values[index], iter->value());
delete iter;
}
for (int i = 0; i < num_records; i++) {
auto iter = reader.NewDataIterator(icmp.user_comparator(),
kDisableGlobalSequenceNumber);
int index = rnd.Uniform(num_records);
std::string ukey(keys[index] + "0" );
InternalKey ikey(ukey, 0, kTypeValue);
bool may_exist = iter->SeekForGet(ikey.Encode().ToString());
if (!may_exist) {
ASSERT_TRUE(iter->Valid());
}
if (!iter->Valid()) {
ASSERT_TRUE(may_exist);
}
delete iter;
}
}
void TestBoundary(InternalKey& ik1, std::string& v1, InternalKey& ik2,
std::string& v2, InternalKey& seek_ikey,
GetContext& get_context, Options& options) {
std::unique_ptr<WritableFileWriter> file_writer;
std::unique_ptr<RandomAccessFileReader> file_reader;
std::unique_ptr<TableReader> table_reader;
int level_ = -1;
std::vector<std::string> keys;
const ImmutableOptions ioptions(options);
const MutableCFOptions moptions(options);
const InternalKeyComparator internal_comparator(options.comparator);
EnvOptions soptions;
soptions.use_mmap_reads = ioptions.allow_mmap_reads;
test::StringSink* sink = new test::StringSink();
std::unique_ptr<FSWritableFile> f(sink);
file_writer.reset(
new WritableFileWriter(std::move(f), "" , FileOptions()));
std::unique_ptr<TableBuilder> builder;
InternalTblPropCollFactories internal_tbl_prop_coll_factories;
std::string column_family_name;
const ReadOptions read_options;
const WriteOptions write_options;
builder.reset(moptions.table_factory->NewTableBuilder(
TableBuilderOptions(
ioptions, moptions, read_options, write_options, internal_comparator,
&internal_tbl_prop_coll_factories, options.compression,
CompressionOptions(),
TablePropertiesCollectorFactory::Context::kUnknownColumnFamily,
column_family_name, level_, kUnknownNewestKeyTime),
file_writer.get()));
builder->Add(ik1.Encode().ToString(), v1);
builder->Add(ik2.Encode().ToString(), v2);
EXPECT_TRUE(builder->status().ok());
Status s = builder->Finish();
ASSERT_OK(file_writer->Flush(IOOptions()));
EXPECT_TRUE(s.ok()) << s.ToString();
EXPECT_EQ(sink->contents().size(), builder->FileSize());
test::StringSource* source = new test::StringSource(
sink->contents(), 0 , ioptions.allow_mmap_reads);
std::unique_ptr<FSRandomAccessFile> file(source);
file_reader.reset(new RandomAccessFileReader(std::move(file), "test"));
const bool kSkipFilters = true;
const bool kImmortal = true;
ASSERT_OK(moptions.table_factory->NewTableReader(
TableReaderOptions(ioptions, moptions.prefix_extractor,
nullptr , soptions,
internal_comparator,
0 , !kSkipFilters,
!kImmortal, level_),
std::move(file_reader), sink->contents().size(), &table_reader));
ReadOptions ro;
ASSERT_OK(table_reader->Get(ro, seek_ikey.Encode().ToString(), &get_context,
moptions.prefix_extractor.get()));
}
TEST(DataBlockHashIndex, BlockBoundary) {
BlockBasedTableOptions table_options;
table_options.data_block_index_type =
BlockBasedTableOptions::kDataBlockBinaryAndHash;
table_options.block_restart_interval = 1;
table_options.block_size = 4096;
Options options;
options.comparator = BytewiseComparator();
options.table_factory.reset(NewBlockBasedTableFactory(table_options));
{
std::string uk1("aab");
InternalKey ik1(uk1, 100, kTypeValue);
std::string v1(4100, '1');
std::string uk2("axy");
InternalKey ik2(uk2, 10, kTypeValue);
std::string v2(4100, '2');
PinnableSlice value;
std::string seek_ukey("axy");
InternalKey seek_ikey(seek_ukey, 60, kTypeValue);
GetContext get_context(options.comparator, nullptr, nullptr, nullptr,
GetContext::kNotFound, seek_ukey, &value, nullptr,
nullptr, nullptr, true, nullptr, nullptr);
TestBoundary(ik1, v1, ik2, v2, seek_ikey, get_context, options);
ASSERT_EQ(get_context.State(), GetContext::kFound);
ASSERT_EQ(value, v2);
value.Reset();
}
{
std::string uk1("axy");
InternalKey ik1(uk1, 100, kTypeValue);
std::string v1(4100, '1');
std::string uk2("axy");
InternalKey ik2(uk2, 10, kTypeValue);
std::string v2(4100, '2');
PinnableSlice value;
std::string seek_ukey("axy");
InternalKey seek_ikey(seek_ukey, 60, kTypeValue);
GetContext get_context(options.comparator, nullptr, nullptr, nullptr,
GetContext::kNotFound, seek_ukey, &value, nullptr,
nullptr, nullptr, true, nullptr, nullptr);
TestBoundary(ik1, v1, ik2, v2, seek_ikey, get_context, options);
ASSERT_EQ(get_context.State(), GetContext::kFound);
ASSERT_EQ(value, v2);
value.Reset();
}
{
std::string uk1("axy");
InternalKey ik1(uk1, 100, kTypeValue);
std::string v1(4100, '1');
std::string uk2("axy");
InternalKey ik2(uk2, 10, kTypeValue);
std::string v2(4100, '2');
PinnableSlice value;
std::string seek_ukey("axy");
InternalKey seek_ikey(seek_ukey, 120, kTypeValue);
GetContext get_context(options.comparator, nullptr, nullptr, nullptr,
GetContext::kNotFound, seek_ukey, &value, nullptr,
nullptr, nullptr, true, nullptr, nullptr);
TestBoundary(ik1, v1, ik2, v2, seek_ikey, get_context, options);
ASSERT_EQ(get_context.State(), GetContext::kFound);
ASSERT_EQ(value, v1);
value.Reset();
}
{
std::string uk1("axy");
InternalKey ik1(uk1, 100, kTypeValue);
std::string v1(4100, '1');
std::string uk2("axy");
InternalKey ik2(uk2, 10, kTypeValue);
std::string v2(4100, '2');
PinnableSlice value;
std::string seek_ukey("axy");
InternalKey seek_ikey(seek_ukey, 5, kTypeValue);
GetContext get_context(options.comparator, nullptr, nullptr, nullptr,
GetContext::kNotFound, seek_ukey, &value, nullptr,
nullptr, nullptr, true, nullptr, nullptr);
TestBoundary(ik1, v1, ik2, v2, seek_ikey, get_context, options);
ASSERT_EQ(get_context.State(), GetContext::kNotFound);
value.Reset();
}
}
}
int main(int argc, char** argv) {
ROCKSDB_NAMESPACE::port::InstallStackTraceHandler();
::testing::InitGoogleTest(&argc, argv);
return RUN_ALL_TESTS();
}