#include "db/compaction/compaction_iterator.h"
#include <iterator>
#include <limits>
#include "db/blob/blob_file_builder.h"
#include "db/blob/blob_index.h"
#include "db/snapshot_checker.h"
#include "port/likely.h"
#include "rocksdb/listener.h"
#include "table/internal_iterator.h"
#include "test_util/sync_point.h"
#define DEFINITELY_IN_SNAPSHOT(seq, snapshot) \
((seq) <= (snapshot) && \
(snapshot_checker_ == nullptr || \
LIKELY(snapshot_checker_->CheckInSnapshot((seq), (snapshot)) == \
SnapshotCheckerResult::kInSnapshot)))
#define DEFINITELY_NOT_IN_SNAPSHOT(seq, snapshot) \
((seq) > (snapshot) || \
(snapshot_checker_ != nullptr && \
UNLIKELY(snapshot_checker_->CheckInSnapshot((seq), (snapshot)) == \
SnapshotCheckerResult::kNotInSnapshot)))
#define IN_EARLIEST_SNAPSHOT(seq) \
((seq) <= earliest_snapshot_ && \
(snapshot_checker_ == nullptr || LIKELY(IsInEarliestSnapshot(seq))))
namespace ROCKSDB_NAMESPACE {
CompactionIterator::CompactionIterator(
InternalIterator* input, const Comparator* cmp, MergeHelper* merge_helper,
SequenceNumber last_sequence, std::vector<SequenceNumber>* snapshots,
SequenceNumber earliest_write_conflict_snapshot,
const SnapshotChecker* snapshot_checker, Env* env,
bool report_detailed_time, bool expect_valid_internal_key,
CompactionRangeDelAggregator* range_del_agg,
BlobFileBuilder* blob_file_builder, bool allow_data_in_errors,
const Compaction* compaction, const CompactionFilter* compaction_filter,
const std::atomic<bool>* shutting_down,
const SequenceNumber preserve_deletes_seqnum,
const std::atomic<int>* manual_compaction_paused,
const std::shared_ptr<Logger> info_log,
const std::string* full_history_ts_low)
: CompactionIterator(
input, cmp, merge_helper, last_sequence, snapshots,
earliest_write_conflict_snapshot, snapshot_checker, env,
report_detailed_time, expect_valid_internal_key, range_del_agg,
blob_file_builder, allow_data_in_errors,
std::unique_ptr<CompactionProxy>(
compaction ? new RealCompaction(compaction) : nullptr),
compaction_filter, shutting_down, preserve_deletes_seqnum,
manual_compaction_paused, info_log, full_history_ts_low) {}
CompactionIterator::CompactionIterator(
InternalIterator* input, const Comparator* cmp, MergeHelper* merge_helper,
SequenceNumber , std::vector<SequenceNumber>* snapshots,
SequenceNumber earliest_write_conflict_snapshot,
const SnapshotChecker* snapshot_checker, Env* env,
bool report_detailed_time, bool expect_valid_internal_key,
CompactionRangeDelAggregator* range_del_agg,
BlobFileBuilder* blob_file_builder, bool allow_data_in_errors,
std::unique_ptr<CompactionProxy> compaction,
const CompactionFilter* compaction_filter,
const std::atomic<bool>* shutting_down,
const SequenceNumber preserve_deletes_seqnum,
const std::atomic<int>* manual_compaction_paused,
const std::shared_ptr<Logger> info_log,
const std::string* full_history_ts_low)
: input_(input),
cmp_(cmp),
merge_helper_(merge_helper),
snapshots_(snapshots),
earliest_write_conflict_snapshot_(earliest_write_conflict_snapshot),
snapshot_checker_(snapshot_checker),
env_(env),
clock_(env_->GetSystemClock().get()),
report_detailed_time_(report_detailed_time),
expect_valid_internal_key_(expect_valid_internal_key),
range_del_agg_(range_del_agg),
blob_file_builder_(blob_file_builder),
compaction_(std::move(compaction)),
compaction_filter_(compaction_filter),
shutting_down_(shutting_down),
manual_compaction_paused_(manual_compaction_paused),
preserve_deletes_seqnum_(preserve_deletes_seqnum),
info_log_(info_log),
allow_data_in_errors_(allow_data_in_errors),
timestamp_size_(cmp_ ? cmp_->timestamp_size() : 0),
full_history_ts_low_(full_history_ts_low),
current_user_key_sequence_(0),
current_user_key_snapshot_(0),
merge_out_iter_(merge_helper_),
blob_garbage_collection_cutoff_file_number_(
ComputeBlobGarbageCollectionCutoffFileNumber(compaction_.get())),
current_key_committed_(false),
cmp_with_history_ts_low_(0) {
assert(compaction_filter_ == nullptr || compaction_ != nullptr);
assert(snapshots_ != nullptr);
bottommost_level_ = compaction_ == nullptr
? false
: compaction_->bottommost_level() &&
!compaction_->allow_ingest_behind();
if (compaction_ != nullptr) {
level_ptrs_ = std::vector<size_t>(compaction_->number_levels(), 0);
}
if (snapshots_->size() == 0) {
visible_at_tip_ = true;
earliest_snapshot_iter_ = snapshots_->end();
earliest_snapshot_ = kMaxSequenceNumber;
latest_snapshot_ = 0;
} else {
visible_at_tip_ = false;
earliest_snapshot_iter_ = snapshots_->begin();
earliest_snapshot_ = snapshots_->at(0);
latest_snapshot_ = snapshots_->back();
}
#ifndef NDEBUG
for (size_t i = 1; i < snapshots_->size(); ++i) {
assert(snapshots_->at(i - 1) < snapshots_->at(i));
}
assert(timestamp_size_ == 0 || !full_history_ts_low_ ||
timestamp_size_ == full_history_ts_low_->size());
#endif
input_->SetPinnedItersMgr(&pinned_iters_mgr_);
TEST_SYNC_POINT_CALLBACK("CompactionIterator:AfterInit", compaction_.get());
}
CompactionIterator::~CompactionIterator() {
input_->SetPinnedItersMgr(nullptr);
}
void CompactionIterator::ResetRecordCounts() {
iter_stats_.num_record_drop_user = 0;
iter_stats_.num_record_drop_hidden = 0;
iter_stats_.num_record_drop_obsolete = 0;
iter_stats_.num_record_drop_range_del = 0;
iter_stats_.num_range_del_drop_obsolete = 0;
iter_stats_.num_optimized_del_drop_obsolete = 0;
}
void CompactionIterator::SeekToFirst() {
NextFromInput();
PrepareOutput();
}
void CompactionIterator::Next() {
if (merge_out_iter_.Valid()) {
merge_out_iter_.Next();
if (merge_out_iter_.Valid()) {
key_ = merge_out_iter_.key();
value_ = merge_out_iter_.value();
Status s = ParseInternalKey(key_, &ikey_, allow_data_in_errors_);
assert(s.ok());
if (!s.ok()) {
ROCKS_LOG_FATAL(info_log_, "Invalid key in compaction. %s",
s.getState());
}
current_key_.UpdateInternalKey(ikey_.sequence, ikey_.type);
key_ = current_key_.GetInternalKey();
ikey_.user_key = current_key_.GetUserKey();
valid_ = true;
} else {
pinned_iters_mgr_.ReleasePinnedData();
NextFromInput();
}
} else {
if (!at_next_) {
input_->Next();
}
NextFromInput();
}
if (valid_) {
has_outputted_key_ = true;
}
PrepareOutput();
}
bool CompactionIterator::InvokeFilterIfNeeded(bool* need_skip,
Slice* skip_until) {
if (!compaction_filter_ ||
(ikey_.type != kTypeValue && ikey_.type != kTypeBlobIndex)) {
return true;
}
bool error = false;
CompactionFilter::Decision filter = CompactionFilter::Decision::kUndetermined;
compaction_filter_value_.clear();
compaction_filter_skip_until_.Clear();
CompactionFilter::ValueType value_type =
ikey_.type == kTypeValue ? CompactionFilter::ValueType::kValue
: CompactionFilter::ValueType::kBlobIndex;
assert(compaction_filter_);
Slice& filter_key =
(ikey_.type == kTypeValue ||
!compaction_filter_->IsStackedBlobDbInternalCompactionFilter())
? ikey_.user_key
: key_;
{
StopWatchNano timer(clock_, report_detailed_time_);
if (kTypeBlobIndex == ikey_.type) {
blob_value_.Reset();
filter = compaction_filter_->FilterBlobByKey(
compaction_->level(), filter_key, &compaction_filter_value_,
compaction_filter_skip_until_.rep());
if (CompactionFilter::Decision::kUndetermined == filter &&
!compaction_filter_->IsStackedBlobDbInternalCompactionFilter()) {
BlobIndex blob_index;
Status s = blob_index.DecodeFrom(value_);
if (!s.ok()) {
status_ = s;
valid_ = false;
return false;
}
if (blob_index.HasTTL() || blob_index.IsInlined()) {
status_ = Status::Corruption("Unexpected TTL/inlined blob index");
valid_ = false;
return false;
}
const Version* const version = compaction_->input_version();
assert(version);
uint64_t bytes_read = 0;
s = version->GetBlob(ReadOptions(), ikey_.user_key, blob_index,
&blob_value_, &bytes_read);
if (!s.ok()) {
status_ = s;
valid_ = false;
return false;
}
++iter_stats_.num_blobs_read;
iter_stats_.total_blob_bytes_read += bytes_read;
value_type = CompactionFilter::ValueType::kValue;
}
}
if (CompactionFilter::Decision::kUndetermined == filter) {
filter = compaction_filter_->FilterV2(
compaction_->level(), filter_key, value_type,
blob_value_.empty() ? value_ : blob_value_, &compaction_filter_value_,
compaction_filter_skip_until_.rep());
}
iter_stats_.total_filter_time +=
env_ != nullptr && report_detailed_time_ ? timer.ElapsedNanos() : 0;
}
if (CompactionFilter::Decision::kUndetermined == filter) {
status_ =
Status::NotSupported("FilterV2() should never return kUndetermined");
valid_ = false;
return false;
}
if (filter == CompactionFilter::Decision::kRemoveAndSkipUntil &&
cmp_->Compare(*compaction_filter_skip_until_.rep(), ikey_.user_key) <=
0) {
filter = CompactionFilter::Decision::kKeep;
}
if (filter == CompactionFilter::Decision::kRemove) {
ikey_.type = kTypeDeletion;
current_key_.UpdateInternalKey(ikey_.sequence, kTypeDeletion);
value_.clear();
iter_stats_.num_record_drop_user++;
} else if (filter == CompactionFilter::Decision::kChangeValue) {
if (ikey_.type == kTypeBlobIndex) {
ikey_.type = kTypeValue;
current_key_.UpdateInternalKey(ikey_.sequence, ikey_.type);
}
value_ = compaction_filter_value_;
} else if (filter == CompactionFilter::Decision::kRemoveAndSkipUntil) {
*need_skip = true;
compaction_filter_skip_until_.ConvertFromUserKey(kMaxSequenceNumber,
kValueTypeForSeek);
*skip_until = compaction_filter_skip_until_.Encode();
} else if (filter == CompactionFilter::Decision::kChangeBlobIndex) {
if (!compaction_filter_->IsStackedBlobDbInternalCompactionFilter()) {
status_ = Status::NotSupported(
"Only stacked BlobDB's internal compaction filter can return "
"kChangeBlobIndex.");
valid_ = false;
return false;
}
if (ikey_.type == kTypeValue) {
ikey_.type = kTypeBlobIndex;
current_key_.UpdateInternalKey(ikey_.sequence, ikey_.type);
}
value_ = compaction_filter_value_;
} else if (filter == CompactionFilter::Decision::kIOError) {
if (!compaction_filter_->IsStackedBlobDbInternalCompactionFilter()) {
status_ = Status::NotSupported(
"CompactionFilter for integrated BlobDB should not return kIOError");
valid_ = false;
return false;
}
status_ = Status::IOError("Failed to access blob during compaction filter");
error = true;
}
return !error;
}
void CompactionIterator::NextFromInput() {
at_next_ = false;
valid_ = false;
while (!valid_ && input_->Valid() && !IsPausingManualCompaction() &&
!IsShuttingDown()) {
key_ = input_->key();
value_ = input_->value();
iter_stats_.num_input_records++;
Status pik_status = ParseInternalKey(key_, &ikey_, allow_data_in_errors_);
if (!pik_status.ok()) {
iter_stats_.num_input_corrupt_records++;
if (expect_valid_internal_key_) {
status_ = pik_status;
return;
}
key_ = current_key_.SetInternalKey(key_);
has_current_user_key_ = false;
current_user_key_sequence_ = kMaxSequenceNumber;
current_user_key_snapshot_ = 0;
valid_ = true;
break;
}
TEST_SYNC_POINT_CALLBACK("CompactionIterator:ProcessKV", &ikey_);
if (ikey_.type == kTypeDeletion || ikey_.type == kTypeSingleDeletion ||
ikey_.type == kTypeDeletionWithTimestamp) {
iter_stats_.num_input_deletion_records++;
}
iter_stats_.total_input_raw_key_bytes += key_.size();
iter_stats_.total_input_raw_value_bytes += value_.size();
bool need_skip = false;
Slice skip_until;
bool user_key_equal_without_ts = false;
int cmp_ts = 0;
if (has_current_user_key_) {
user_key_equal_without_ts =
cmp_->EqualWithoutTimestamp(ikey_.user_key, current_user_key_);
cmp_ts = timestamp_size_ ? cmp_->CompareTimestamp(
ExtractTimestampFromUserKey(
ikey_.user_key, timestamp_size_),
curr_ts_)
: 0;
}
if (!has_current_user_key_ || !user_key_equal_without_ts || cmp_ts != 0) {
key_ = current_key_.SetInternalKey(key_, &ikey_);
UpdateTimestampAndCompareWithFullHistoryLow();
if (!has_current_user_key_ || !timestamp_size_ || !full_history_ts_low_ ||
!user_key_equal_without_ts || cmp_with_history_ts_low_ >= 0) {
current_user_key_sequence_ = kMaxSequenceNumber;
current_user_key_snapshot_ = 0;
has_current_user_key_ = true;
}
current_user_key_ = ikey_.user_key;
has_outputted_key_ = false;
current_key_committed_ = KeyCommitted(ikey_.sequence);
if (current_key_committed_ &&
!InvokeFilterIfNeeded(&need_skip, &skip_until)) {
break;
}
} else {
current_key_.UpdateInternalKey(ikey_.sequence, ikey_.type);
key_ = current_key_.GetInternalKey();
ikey_.user_key = current_key_.GetUserKey();
if (UNLIKELY(!current_key_committed_)) {
assert(snapshot_checker_ != nullptr);
current_key_committed_ = KeyCommitted(ikey_.sequence);
if (current_key_committed_ &&
!InvokeFilterIfNeeded(&need_skip, &skip_until)) {
break;
}
}
}
if (UNLIKELY(!current_key_committed_)) {
assert(snapshot_checker_ != nullptr);
valid_ = true;
break;
}
SequenceNumber last_sequence = current_user_key_sequence_;
current_user_key_sequence_ = ikey_.sequence;
SequenceNumber last_snapshot = current_user_key_snapshot_;
SequenceNumber prev_snapshot = 0; current_user_key_snapshot_ =
visible_at_tip_
? earliest_snapshot_
: findEarliestVisibleSnapshot(ikey_.sequence, &prev_snapshot);
if (need_skip) {
} else if (clear_and_output_next_key_) {
assert(ikey_.type == kTypeValue || ikey_.type == kTypeBlobIndex);
if (ikey_.type != kTypeValue && ikey_.type != kTypeBlobIndex) {
ROCKS_LOG_FATAL(info_log_,
"Unexpected key type %d for compaction output",
ikey_.type);
}
assert(current_user_key_snapshot_ == last_snapshot);
if (current_user_key_snapshot_ != last_snapshot) {
ROCKS_LOG_FATAL(info_log_,
"current_user_key_snapshot_ (%" PRIu64
") != last_snapshot (%" PRIu64 ")",
current_user_key_snapshot_, last_snapshot);
}
if (ikey_.type == kTypeBlobIndex) {
ikey_.type = kTypeValue;
current_key_.UpdateInternalKey(ikey_.sequence, ikey_.type);
}
value_.clear();
valid_ = true;
clear_and_output_next_key_ = false;
} else if (ikey_.type == kTypeSingleDeletion) {
ParsedInternalKey next_ikey;
input_->Next();
if (input_->Valid() &&
ParseInternalKey(input_->key(), &next_ikey, allow_data_in_errors_)
.ok() &&
cmp_->Equal(ikey_.user_key, next_ikey.user_key)) {
if (prev_snapshot == 0 ||
DEFINITELY_NOT_IN_SNAPSHOT(next_ikey.sequence, prev_snapshot)) {
if (next_ikey.type == kTypeSingleDeletion) {
++iter_stats_.num_record_drop_obsolete;
++iter_stats_.num_single_del_mismatch;
} else if (has_outputted_key_ ||
DEFINITELY_IN_SNAPSHOT(
ikey_.sequence, earliest_write_conflict_snapshot_)) {
if (next_ikey.type != kTypeValue &&
next_ikey.type != kTypeBlobIndex) {
++iter_stats_.num_single_del_mismatch;
}
++iter_stats_.num_record_drop_hidden;
++iter_stats_.num_record_drop_obsolete;
input_->Next();
} else {
valid_ = true;
clear_and_output_next_key_ = true;
}
} else {
valid_ = true;
}
} else {
has_current_user_key_ = false;
if (compaction_ != nullptr && IN_EARLIEST_SNAPSHOT(ikey_.sequence) &&
compaction_->KeyNotExistsBeyondOutputLevel(ikey_.user_key,
&level_ptrs_)) {
++iter_stats_.num_record_drop_obsolete;
++iter_stats_.num_single_del_fallthru;
if (!bottommost_level_) {
++iter_stats_.num_optimized_del_drop_obsolete;
}
} else {
valid_ = true;
}
}
if (valid_) {
at_next_ = true;
}
} else if (last_snapshot == current_user_key_snapshot_ ||
(last_snapshot > 0 &&
last_snapshot < current_user_key_snapshot_)) {
assert(last_sequence >= current_user_key_sequence_);
if (last_sequence < current_user_key_sequence_) {
ROCKS_LOG_FATAL(info_log_,
"last_sequence (%" PRIu64
") < current_user_key_sequence_ (%" PRIu64 ")",
last_sequence, current_user_key_sequence_);
}
++iter_stats_.num_record_drop_hidden; input_->Next();
} else if (compaction_ != nullptr &&
(ikey_.type == kTypeDeletion ||
(ikey_.type == kTypeDeletionWithTimestamp &&
cmp_with_history_ts_low_ < 0)) &&
IN_EARLIEST_SNAPSHOT(ikey_.sequence) &&
ikeyNotNeededForIncrementalSnapshot() &&
compaction_->KeyNotExistsBeyondOutputLevel(ikey_.user_key,
&level_ptrs_)) {
++iter_stats_.num_record_drop_obsolete;
if (!bottommost_level_) {
++iter_stats_.num_optimized_del_drop_obsolete;
}
input_->Next();
} else if ((ikey_.type == kTypeDeletion ||
(ikey_.type == kTypeDeletionWithTimestamp &&
cmp_with_history_ts_low_ < 0)) &&
bottommost_level_ && ikeyNotNeededForIncrementalSnapshot()) {
assert(!compaction_ || compaction_->KeyNotExistsBeyondOutputLevel(
ikey_.user_key, &level_ptrs_));
ParsedInternalKey next_ikey;
input_->Next();
while (!IsPausingManualCompaction() && !IsShuttingDown() &&
input_->Valid() &&
(ParseInternalKey(input_->key(), &next_ikey, allow_data_in_errors_)
.ok()) &&
cmp_->EqualWithoutTimestamp(ikey_.user_key, next_ikey.user_key) &&
(prev_snapshot == 0 ||
DEFINITELY_NOT_IN_SNAPSHOT(next_ikey.sequence, prev_snapshot))) {
input_->Next();
}
if (input_->Valid() &&
(ParseInternalKey(input_->key(), &next_ikey, allow_data_in_errors_)
.ok()) &&
cmp_->EqualWithoutTimestamp(ikey_.user_key, next_ikey.user_key)) {
valid_ = true;
at_next_ = true;
}
} else if (ikey_.type == kTypeMerge) {
if (!merge_helper_->HasOperator()) {
status_ = Status::InvalidArgument(
"merge_operator is not properly initialized.");
return;
}
pinned_iters_mgr_.StartPinning();
Status s =
merge_helper_->MergeUntil(input_, range_del_agg_, prev_snapshot,
bottommost_level_, allow_data_in_errors_);
merge_out_iter_.SeekToFirst();
if (!s.ok() && !s.IsMergeInProgress()) {
status_ = s;
return;
} else if (merge_out_iter_.Valid()) {
key_ = merge_out_iter_.key();
value_ = merge_out_iter_.value();
pik_status = ParseInternalKey(key_, &ikey_, allow_data_in_errors_);
assert(pik_status.ok());
if (!pik_status.ok()) {
ROCKS_LOG_FATAL(info_log_, "Invalid key in compaction. %s",
pik_status.getState());
}
current_key_.UpdateInternalKey(ikey_.sequence, ikey_.type);
key_ = current_key_.GetInternalKey();
ikey_.user_key = current_key_.GetUserKey();
valid_ = true;
} else {
has_current_user_key_ = false;
pinned_iters_mgr_.ReleasePinnedData();
if (merge_helper_->FilteredUntil(&skip_until)) {
need_skip = true;
}
}
} else {
bool should_delete = range_del_agg_->ShouldDelete(
key_, RangeDelPositioningMode::kForwardTraversal);
if (should_delete) {
++iter_stats_.num_record_drop_hidden;
++iter_stats_.num_record_drop_range_del;
input_->Next();
} else {
valid_ = true;
}
}
if (need_skip) {
input_->Seek(skip_until);
}
}
if (!valid_ && IsShuttingDown()) {
status_ = Status::ShutdownInProgress();
}
if (IsPausingManualCompaction()) {
status_ = Status::Incomplete(Status::SubCode::kManualCompactionPaused);
}
}
bool CompactionIterator::ExtractLargeValueIfNeededImpl() {
if (!blob_file_builder_) {
return false;
}
blob_index_.clear();
const Status s = blob_file_builder_->Add(user_key(), value_, &blob_index_);
if (!s.ok()) {
status_ = s;
valid_ = false;
return false;
}
if (blob_index_.empty()) {
return false;
}
value_ = blob_index_;
return true;
}
void CompactionIterator::ExtractLargeValueIfNeeded() {
assert(ikey_.type == kTypeValue);
if (!ExtractLargeValueIfNeededImpl()) {
return;
}
ikey_.type = kTypeBlobIndex;
current_key_.UpdateInternalKey(ikey_.sequence, ikey_.type);
}
void CompactionIterator::GarbageCollectBlobIfNeeded() {
assert(ikey_.type == kTypeBlobIndex);
if (!compaction_) {
return;
}
if (compaction_->enable_blob_garbage_collection()) {
BlobIndex blob_index;
{
const Status s = blob_index.DecodeFrom(value_);
if (!s.ok()) {
status_ = s;
valid_ = false;
return;
}
}
if (blob_index.IsInlined() || blob_index.HasTTL()) {
status_ = Status::Corruption("Unexpected TTL/inlined blob index");
valid_ = false;
return;
}
if (blob_index.file_number() >=
blob_garbage_collection_cutoff_file_number_) {
return;
}
const Version* const version = compaction_->input_version();
assert(version);
uint64_t bytes_read = 0;
{
const Status s = version->GetBlob(ReadOptions(), user_key(), blob_index,
&blob_value_, &bytes_read);
if (!s.ok()) {
status_ = s;
valid_ = false;
return;
}
}
++iter_stats_.num_blobs_read;
iter_stats_.total_blob_bytes_read += bytes_read;
value_ = blob_value_;
if (ExtractLargeValueIfNeededImpl()) {
return;
}
ikey_.type = kTypeValue;
current_key_.UpdateInternalKey(ikey_.sequence, ikey_.type);
return;
}
if (compaction_filter_ &&
compaction_filter_->IsStackedBlobDbInternalCompactionFilter()) {
const auto blob_decision = compaction_filter_->PrepareBlobOutput(
user_key(), value_, &compaction_filter_value_);
if (blob_decision == CompactionFilter::BlobDecision::kCorruption) {
status_ =
Status::Corruption("Corrupted blob reference encountered during GC");
valid_ = false;
return;
}
if (blob_decision == CompactionFilter::BlobDecision::kIOError) {
status_ = Status::IOError("Could not relocate blob during GC");
valid_ = false;
return;
}
if (blob_decision == CompactionFilter::BlobDecision::kChangeValue) {
value_ = compaction_filter_value_;
return;
}
}
}
void CompactionIterator::PrepareOutput() {
if (valid_) {
if (ikey_.type == kTypeValue) {
ExtractLargeValueIfNeeded();
} else if (ikey_.type == kTypeBlobIndex) {
GarbageCollectBlobIfNeeded();
}
if (valid_ && compaction_ != nullptr &&
!compaction_->allow_ingest_behind() &&
ikeyNotNeededForIncrementalSnapshot() && bottommost_level_ &&
IN_EARLIEST_SNAPSHOT(ikey_.sequence) && ikey_.type != kTypeMerge) {
assert(ikey_.type != kTypeDeletion && ikey_.type != kTypeSingleDeletion);
if (ikey_.type == kTypeDeletion || ikey_.type == kTypeSingleDeletion) {
ROCKS_LOG_FATAL(info_log_,
"Unexpected key type %d for seq-zero optimization",
ikey_.type);
}
ikey_.sequence = 0;
if (!timestamp_size_) {
current_key_.UpdateInternalKey(0, ikey_.type);
} else if (full_history_ts_low_ && cmp_with_history_ts_low_ < 0) {
const std::string kTsMin(timestamp_size_, static_cast<char>(0));
const Slice ts_slice = kTsMin;
ikey_.SetTimestamp(ts_slice);
current_key_.UpdateInternalKey(0, ikey_.type, &ts_slice);
}
}
}
}
inline SequenceNumber CompactionIterator::findEarliestVisibleSnapshot(
SequenceNumber in, SequenceNumber* prev_snapshot) {
assert(snapshots_->size());
if (snapshots_->size() == 0) {
ROCKS_LOG_FATAL(info_log_,
"No snapshot left in findEarliestVisibleSnapshot");
}
auto snapshots_iter = std::lower_bound(
snapshots_->begin(), snapshots_->end(), in);
if (snapshots_iter == snapshots_->begin()) {
*prev_snapshot = 0;
} else {
*prev_snapshot = *std::prev(snapshots_iter);
assert(*prev_snapshot < in);
if (*prev_snapshot >= in) {
ROCKS_LOG_FATAL(info_log_,
"*prev_snapshot >= in in findEarliestVisibleSnapshot");
}
}
if (snapshot_checker_ == nullptr) {
return snapshots_iter != snapshots_->end()
? *snapshots_iter : kMaxSequenceNumber;
}
bool has_released_snapshot = !released_snapshots_.empty();
for (; snapshots_iter != snapshots_->end(); ++snapshots_iter) {
auto cur = *snapshots_iter;
assert(in <= cur);
if (in > cur) {
ROCKS_LOG_FATAL(info_log_, "in > cur in findEarliestVisibleSnapshot");
}
if (has_released_snapshot && released_snapshots_.count(cur) > 0) {
continue;
}
auto res = snapshot_checker_->CheckInSnapshot(in, cur);
if (res == SnapshotCheckerResult::kInSnapshot) {
return cur;
} else if (res == SnapshotCheckerResult::kSnapshotReleased) {
released_snapshots_.insert(cur);
}
*prev_snapshot = cur;
}
return kMaxSequenceNumber;
}
inline bool CompactionIterator::ikeyNotNeededForIncrementalSnapshot() {
return (!compaction_->preserve_deletes()) ||
(ikey_.sequence < preserve_deletes_seqnum_);
}
bool CompactionIterator::IsInEarliestSnapshot(SequenceNumber sequence) {
assert(snapshot_checker_ != nullptr);
bool pre_condition = (earliest_snapshot_ == kMaxSequenceNumber ||
(earliest_snapshot_iter_ != snapshots_->end() &&
*earliest_snapshot_iter_ == earliest_snapshot_));
assert(pre_condition);
if (!pre_condition) {
ROCKS_LOG_FATAL(info_log_,
"Pre-Condition is not hold in IsInEarliestSnapshot");
}
auto in_snapshot =
snapshot_checker_->CheckInSnapshot(sequence, earliest_snapshot_);
while (UNLIKELY(in_snapshot == SnapshotCheckerResult::kSnapshotReleased)) {
released_snapshots_.insert(earliest_snapshot_);
earliest_snapshot_iter_++;
if (earliest_snapshot_iter_ == snapshots_->end()) {
earliest_snapshot_ = kMaxSequenceNumber;
} else {
earliest_snapshot_ = *earliest_snapshot_iter_;
}
in_snapshot =
snapshot_checker_->CheckInSnapshot(sequence, earliest_snapshot_);
}
assert(in_snapshot != SnapshotCheckerResult::kSnapshotReleased);
if (in_snapshot == SnapshotCheckerResult::kSnapshotReleased) {
ROCKS_LOG_FATAL(info_log_,
"Unexpected released snapshot in IsInEarliestSnapshot");
}
return in_snapshot == SnapshotCheckerResult::kInSnapshot;
}
uint64_t CompactionIterator::ComputeBlobGarbageCollectionCutoffFileNumber(
const CompactionProxy* compaction) {
if (!compaction) {
return 0;
}
if (!compaction->enable_blob_garbage_collection()) {
return 0;
}
Version* const version = compaction->input_version();
assert(version);
const VersionStorageInfo* const storage_info = version->storage_info();
assert(storage_info);
const auto& blob_files = storage_info->GetBlobFiles();
auto it = blob_files.begin();
std::advance(
it, compaction->blob_garbage_collection_age_cutoff() * blob_files.size());
return it != blob_files.end() ? it->first
: std::numeric_limits<uint64_t>::max();
}
}