#ifndef CPC_COMPRESSOR_IMPL_HPP_
#define CPC_COMPRESSOR_IMPL_HPP_
#include <memory>
#include "compression_data.hpp"
#include "cpc_util.hpp"
#include "cpc_common.hpp"
#include "count_zeros.hpp"
namespace datasketches {
template<typename A>
cpc_compressor<A>& get_compressor() {
static cpc_compressor<A>* instance = new cpc_compressor<A>(); return *instance;
}
template<typename A>
cpc_compressor<A>::cpc_compressor() {
make_decoding_tables();
}
template<typename A>
cpc_compressor<A>::~cpc_compressor() {
free_decoding_tables();
}
template<typename A>
uint8_t* cpc_compressor<A>::make_inverse_permutation(const uint8_t* permu, unsigned length) {
uint8_t* inverse = new uint8_t[length]; for (unsigned i = 0; i < length; i++) {
inverse[permu[i]] = static_cast<uint8_t>(i);
}
for (unsigned i = 0; i < length; i++) {
if (permu[inverse[i]] != i) throw std::logic_error("inverse permutation error");
}
return inverse;
}
template<typename A>
uint16_t* cpc_compressor<A>::make_decoding_table(const uint16_t* encoding_table, unsigned num_byte_values) {
uint16_t* decoding_table = new uint16_t[4096]; for (unsigned byte_value = 0; byte_value < num_byte_values; byte_value++) {
const uint16_t encoding_entry = encoding_table[byte_value];
const uint16_t code_value = encoding_entry & 0xfff;
const uint8_t code_length = encoding_entry >> 12;
const uint16_t decoding_entry = static_cast<uint16_t>((code_length << 8) | byte_value);
const uint8_t garbage_length = 12 - code_length;
const uint32_t num_copies = 1 << garbage_length;
for (uint32_t garbage_bits = 0; garbage_bits < num_copies; garbage_bits++) {
const uint16_t extended_code_value = static_cast<uint16_t>(code_value | (garbage_bits << code_length));
decoding_table[extended_code_value & 0xfff] = decoding_entry;
}
}
return decoding_table;
}
template<typename A>
void cpc_compressor<A>::validate_decoding_table(const uint16_t* decoding_table, const uint16_t* encoding_table) const {
for (int decode_this = 0; decode_this < 4096; decode_this++) {
const int tmp_d = decoding_table[decode_this];
const int decoded_byte = tmp_d & 0xff;
const int decoded_length = tmp_d >> 8;
const int tmp_e = encoding_table[decoded_byte];
const int encoded_bit_pattern = tmp_e & 0xfff;
const int encoded_length = tmp_e >> 12;
if (decoded_length != encoded_length) throw std::logic_error("decoded length error");
if (encoded_bit_pattern != (decode_this & ((1 << decoded_length) - 1))) throw std::logic_error("bit pattern error");
}
}
template<typename A>
void cpc_compressor<A>::make_decoding_tables() {
length_limited_unary_decoding_table65 = make_decoding_table(length_limited_unary_encoding_table65, 65);
validate_decoding_table(
length_limited_unary_decoding_table65,
length_limited_unary_encoding_table65
);
for (int i = 0; i < (16 + 6); i++) {
decoding_tables_for_high_entropy_byte[i] = make_decoding_table(encoding_tables_for_high_entropy_byte[i], 256);
validate_decoding_table(
decoding_tables_for_high_entropy_byte[i],
encoding_tables_for_high_entropy_byte[i]
);
}
for (int i = 0; i < 16; i++) {
column_permutations_for_decoding[i] = make_inverse_permutation(column_permutations_for_encoding[i], 56);
}
}
template<typename A>
void cpc_compressor<A>::free_decoding_tables() {
delete[] length_limited_unary_decoding_table65;
for (int i = 0; i < (16 + 6); i++) {
delete[] decoding_tables_for_high_entropy_byte[i];
}
for (int i = 0; i < 16; i++) {
delete[] column_permutations_for_decoding[i];
}
}
template<typename A>
void cpc_compressor<A>::compress(const cpc_sketch_alloc<A>& source, compressed_state<A>& result) const {
switch (source.determine_flavor()) {
case cpc_sketch_alloc<A>::flavor::EMPTY:
break;
case cpc_sketch_alloc<A>::flavor::SPARSE:
compress_sparse_flavor(source, result);
if (result.window_data.size() > 0) throw std::logic_error("window is not expected");
if (result.table_data.size() == 0) throw std::logic_error("table is expected");
break;
case cpc_sketch_alloc<A>::flavor::HYBRID:
compress_hybrid_flavor(source, result);
if (result.window_data.size() > 0) throw std::logic_error("window is not expected");
if (result.table_data.size() == 0) throw std::logic_error("table is expected");
break;
case cpc_sketch_alloc<A>::flavor::PINNED:
compress_pinned_flavor(source, result);
if (result.window_data.size() == 0) throw std::logic_error("window is not expected");
break;
case cpc_sketch_alloc<A>::flavor::SLIDING:
compress_sliding_flavor(source, result);
if (result.window_data.size() == 0) throw std::logic_error("window is expected");
break;
default: throw std::logic_error("Unknown sketch flavor");
}
}
template<typename A>
void cpc_compressor<A>::uncompress(const compressed_state<A>& source, uncompressed_state<A>& target, uint8_t lg_k, uint32_t num_coupons) const {
switch (cpc_sketch_alloc<A>::determine_flavor(lg_k, num_coupons)) {
case cpc_sketch_alloc<A>::flavor::EMPTY:
target.table = u32_table<A>(2, 6 + lg_k, source.table_data.get_allocator());
break;
case cpc_sketch_alloc<A>::flavor::SPARSE:
uncompress_sparse_flavor(source, target, lg_k);
break;
case cpc_sketch_alloc<A>::flavor::HYBRID:
uncompress_hybrid_flavor(source, target, lg_k);
break;
case cpc_sketch_alloc<A>::flavor::PINNED:
if (source.window_data.size() == 0) throw std::logic_error("window is expected");
uncompress_pinned_flavor(source, target, lg_k, num_coupons);
break;
case cpc_sketch_alloc<A>::flavor::SLIDING:
uncompress_sliding_flavor(source, target, lg_k, num_coupons);
break;
default: std::logic_error("Unknown sketch flavor");
}
}
template<typename A>
void cpc_compressor<A>::compress_sparse_flavor(const cpc_sketch_alloc<A>& source, compressed_state<A>& result) const {
if (source.sliding_window.size() > 0) throw std::logic_error("unexpected sliding window");
vector_u32<A> pairs = source.surprising_value_table.unwrapping_get_items();
u32_table<A>::introspective_insertion_sort(pairs.data(), 0, pairs.size());
compress_surprising_values(pairs, source.get_lg_k(), result);
}
template<typename A>
void cpc_compressor<A>::uncompress_sparse_flavor(const compressed_state<A>& source, uncompressed_state<A>& target, uint8_t lg_k) const {
if (source.window_data.size() > 0) throw std::logic_error("unexpected sliding window");
if (source.table_data.size() == 0) throw std::logic_error("table is expected");
vector_u32<A> pairs = uncompress_surprising_values(source.table_data.data(), source.table_data_words, source.table_num_entries,
lg_k, source.table_data.get_allocator());
target.table = u32_table<A>::make_from_pairs(pairs.data(), source.table_num_entries, lg_k, pairs.get_allocator());
}
template<typename A>
void cpc_compressor<A>::compress_hybrid_flavor(const cpc_sketch_alloc<A>& source, compressed_state<A>& result) const {
if (source.sliding_window.size() == 0) throw std::logic_error("no sliding window");
if (source.window_offset != 0) throw std::logic_error("window_offset != 0");
const uint32_t k = 1 << source.get_lg_k();
vector_u32<A> pairs_from_table = source.surprising_value_table.unwrapping_get_items();
const uint32_t num_pairs_from_table = static_cast<uint32_t>(pairs_from_table.size());
if (num_pairs_from_table > 0) u32_table<A>::introspective_insertion_sort(pairs_from_table.data(), 0, num_pairs_from_table);
const uint32_t num_pairs_from_window = source.get_num_coupons() - num_pairs_from_table;
vector_u32<A> all_pairs = tricky_get_pairs_from_window(source.sliding_window.data(), k, num_pairs_from_window, num_pairs_from_table, source.get_allocator());
u32_table<A>::merge(
pairs_from_table.data(), 0, pairs_from_table.size(),
all_pairs.data(), num_pairs_from_table, num_pairs_from_window,
all_pairs.data(), 0
);
compress_surprising_values(all_pairs, source.get_lg_k(), result);
}
template<typename A>
void cpc_compressor<A>::uncompress_hybrid_flavor(const compressed_state<A>& source, uncompressed_state<A>& target, uint8_t lg_k) const {
if (source.window_data.size() > 0) throw std::logic_error("window is not expected");
if (source.table_data.size() == 0) throw std::logic_error("table is expected");
vector_u32<A> pairs = uncompress_surprising_values(source.table_data.data(), source.table_data_words, source.table_num_entries,
lg_k, source.table_data.get_allocator());
const uint32_t k = 1 << lg_k;
target.window.resize(k, 0); uint32_t next_true_pair = 0;
for (uint32_t i = 0; i < source.table_num_entries; i++) {
const uint32_t row_col = pairs[i];
if (row_col == UINT32_MAX) throw std::logic_error("empty marker is not expected");
const uint8_t col = row_col & 63;
if (col < 8) {
const uint32_t row = row_col >> 6;
target.window[row] |= 1 << col; } else {
pairs[next_true_pair++] = row_col; }
}
target.table = u32_table<A>::make_from_pairs(pairs.data(), next_true_pair, lg_k, pairs.get_allocator());
}
template<typename A>
void cpc_compressor<A>::compress_pinned_flavor(const cpc_sketch_alloc<A>& source, compressed_state<A>& result) const {
compress_sliding_window(source.sliding_window.data(), source.get_lg_k(), source.get_num_coupons(), result);
vector_u32<A> pairs = source.surprising_value_table.unwrapping_get_items();
if (pairs.size() > 0) {
for (size_t i = 0; i < pairs.size(); i++) {
if ((pairs[i] & 63) < 8) throw std::logic_error("(pairs[i] & 63) < 8");
pairs[i] -= 8;
}
if (pairs.size() > 0) u32_table<A>::introspective_insertion_sort(pairs.data(), 0, pairs.size());
compress_surprising_values(pairs, source.get_lg_k(), result);
}
}
template<typename A>
void cpc_compressor<A>::uncompress_pinned_flavor(const compressed_state<A>& source, uncompressed_state<A>& target,
uint8_t lg_k, uint32_t num_coupons) const {
if (source.window_data.size() == 0) throw std::logic_error("window is expected");
uncompress_sliding_window(source.window_data.data(), source.window_data_words, target.window, lg_k, num_coupons);
const uint32_t num_pairs = source.table_num_entries;
if (num_pairs == 0) {
target.table = u32_table<A>(2, 6 + lg_k, source.table_data.get_allocator());
} else {
if (source.table_data.size() == 0) throw std::logic_error("table is expected");
vector_u32<A> pairs = uncompress_surprising_values(source.table_data.data(), source.table_data_words, num_pairs,
lg_k, source.table_data.get_allocator());
for (uint32_t i = 0; i < num_pairs; i++) {
if ((pairs[i] & 63) >= 56) throw std::logic_error("(pairs[i] & 63) >= 56");
pairs[i] += 8;
}
target.table = u32_table<A>::make_from_pairs(pairs.data(), num_pairs, lg_k, pairs.get_allocator());
}
}
template<typename A>
void cpc_compressor<A>::compress_sliding_flavor(const cpc_sketch_alloc<A>& source, compressed_state<A>& result) const {
compress_sliding_window(source.sliding_window.data(), source.get_lg_k(), source.get_num_coupons(), result);
vector_u32<A> pairs = source.surprising_value_table.unwrapping_get_items();
if (pairs.size() > 0) {
const uint8_t pseudo_phase = determine_pseudo_phase(source.get_lg_k(), source.get_num_coupons());
const uint8_t* permutation = column_permutations_for_encoding[pseudo_phase];
const uint8_t offset = source.window_offset;
if (offset > 56) throw std::out_of_range("offset out of range");
for (size_t i = 0; i < pairs.size(); i++) {
const uint32_t row_col = pairs[i];
const uint32_t row = row_col >> 6;
uint8_t col = row_col & 63;
col = (col + 56 - offset) & 63;
if (col >= 56) throw std::out_of_range("col out of range");
col = permutation[col];
pairs[i] = (row << 6) | col;
}
if (pairs.size() > 0) u32_table<A>::introspective_insertion_sort(pairs.data(), 0, pairs.size());
compress_surprising_values(pairs, source.get_lg_k(), result);
}
}
template<typename A>
void cpc_compressor<A>::uncompress_sliding_flavor(const compressed_state<A>& source, uncompressed_state<A>& target,
uint8_t lg_k, uint32_t num_coupons) const {
if (source.window_data.size() == 0) throw std::logic_error("window is expected");
uncompress_sliding_window(source.window_data.data(), source.window_data_words, target.window, lg_k, num_coupons);
const uint32_t num_pairs = source.table_num_entries;
if (num_pairs == 0) {
target.table = u32_table<A>(2, 6 + lg_k, source.table_data.get_allocator());
} else {
if (source.table_data.size() == 0) throw std::logic_error("table is expected");
vector_u32<A> pairs = uncompress_surprising_values(source.table_data.data(), source.table_data_words, num_pairs,
lg_k, source.table_data.get_allocator());
const uint8_t pseudo_phase = determine_pseudo_phase(lg_k, num_coupons);
if (pseudo_phase >= 16) throw std::logic_error("pseudo phase >= 16");
const uint8_t* permutation = column_permutations_for_decoding[pseudo_phase];
uint8_t offset = cpc_sketch_alloc<A>::determine_correct_offset(lg_k, num_coupons);
if (offset > 56) throw std::out_of_range("offset out of range");
for (uint32_t i = 0; i < num_pairs; i++) {
const uint32_t row_col = pairs[i];
const uint32_t row = row_col >> 6;
uint8_t col = row_col & 63;
col = permutation[col];
col = (col + (offset + 8)) & 63;
pairs[i] = (row << 6) | col;
}
target.table = u32_table<A>::make_from_pairs(pairs.data(), num_pairs, lg_k, pairs.get_allocator());
}
}
template<typename A>
void cpc_compressor<A>::compress_surprising_values(const vector_u32<A>& pairs, uint8_t lg_k, compressed_state<A>& result) const {
const uint32_t k = 1 << lg_k;
const uint32_t num_pairs = static_cast<uint32_t>(pairs.size());
const uint8_t num_base_bits = golomb_choose_number_of_base_bits(k + num_pairs, num_pairs);
const uint64_t table_len = safe_length_for_compressed_pair_buf(k, num_pairs, num_base_bits);
result.table_data.resize(table_len);
uint32_t csv_length = low_level_compress_pairs(pairs.data(), static_cast<uint32_t>(pairs.size()), num_base_bits, result.table_data.data());
result.table_data_words = csv_length;
result.table_num_entries = num_pairs;
}
template<typename A>
vector_u32<A> cpc_compressor<A>::uncompress_surprising_values(const uint32_t* data, uint32_t data_words, uint32_t num_pairs,
uint8_t lg_k, const A& allocator) const {
const uint32_t k = 1 << lg_k;
vector_u32<A> pairs(num_pairs, 0, allocator);
const uint8_t num_base_bits = golomb_choose_number_of_base_bits(k + num_pairs, num_pairs);
low_level_uncompress_pairs(pairs.data(), num_pairs, num_base_bits, data, data_words);
return pairs;
}
template<typename A>
void cpc_compressor<A>::compress_sliding_window(const uint8_t* window, uint8_t lg_k, uint32_t num_coupons, compressed_state<A>& target) const {
const uint32_t k = 1 << lg_k;
const size_t window_buf_len = safe_length_for_compressed_window_buf(k);
target.window_data.resize(window_buf_len);
const uint8_t pseudo_phase = determine_pseudo_phase(lg_k, num_coupons);
size_t data_words = low_level_compress_bytes(window, k, encoding_tables_for_high_entropy_byte[pseudo_phase], target.window_data.data());
target.window_data_words = static_cast<uint32_t>(data_words);
}
template<typename A>
void cpc_compressor<A>::uncompress_sliding_window(const uint32_t* data, uint32_t data_words, vector_u8<A>& window,
uint8_t lg_k, uint32_t num_coupons) const {
const uint32_t k = 1 << lg_k;
window.resize(k); const uint8_t pseudo_phase = determine_pseudo_phase(lg_k, num_coupons);
low_level_uncompress_bytes(window.data(), k, decoding_tables_for_high_entropy_byte[pseudo_phase], data, data_words);
}
template<typename A>
size_t cpc_compressor<A>::safe_length_for_compressed_pair_buf(uint32_t k, uint32_t num_pairs, uint8_t num_base_bits) {
const size_t ybits = num_pairs * (1 + num_base_bits) + (k >> num_base_bits);
const size_t xbits = 12 * num_pairs;
const size_t padding = num_base_bits > 10 ? 0 : 10 - num_base_bits;
return divide_longs_rounding_up(xbits + ybits + padding, 32);
}
template<typename A>
size_t cpc_compressor<A>::safe_length_for_compressed_window_buf(uint32_t k) { const size_t bits = 12 * k + 11; return divide_longs_rounding_up(bits, 32);
}
template<typename A>
uint8_t cpc_compressor<A>::determine_pseudo_phase(uint8_t lg_k, uint32_t c) {
const uint32_t k = 1 << lg_k;
if (1000 * c < 2375 * k) {
if ( 4 * c < 3 * k) return 16 + 0; else if ( 10 * c < 11 * k) return 16 + 1; else if ( 100 * c < 132 * k) return 16 + 2; else if ( 3 * c < 5 * k) return 16 + 3; else if (1000 * c < 1965 * k) return 16 + 4; else if (1000 * c < 2275 * k) return 16 + 5; else return 6; } else { if (lg_k < 4) throw std::logic_error("lgK < 4");
const size_t tmp = c >> (lg_k - 4);
const uint8_t phase = tmp & 15;
if (phase < 0 || phase >= 16) throw std::out_of_range("wrong phase");
return phase;
}
}
static inline void maybe_flush_bitbuf(uint64_t& bitbuf, uint8_t& bufbits, uint32_t* wordarr, uint32_t& wordindex) {
if (bufbits >= 32) {
wordarr[wordindex++] = bitbuf & 0xffffffff;
bitbuf = bitbuf >> 32;
bufbits -= 32;
}
}
static inline void maybe_fill_bitbuf(uint64_t& bitbuf, uint8_t& bufbits, const uint32_t* wordarr, uint32_t& wordindex, uint8_t minbits) {
if (bufbits < minbits) {
bitbuf |= static_cast<uint64_t>(wordarr[wordindex++]) << bufbits;
bufbits += 32;
}
}
template<typename A>
uint32_t cpc_compressor<A>::low_level_compress_bytes(
const uint8_t* byte_array, uint32_t num_bytes_to_encode,
const uint16_t* encoding_table,
uint32_t* compressed_words ) const {
uint64_t bitbuf = 0; uint8_t bufbits = 0; uint32_t next_word_index = 0;
for (uint32_t byte_index = 0; byte_index < num_bytes_to_encode; byte_index++) {
const uint16_t code_info = encoding_table[byte_array[byte_index]];
const uint64_t code_val = code_info & 0xfff;
const uint8_t code_len = code_info >> 12;
bitbuf |= (code_val << bufbits);
bufbits += code_len;
maybe_flush_bitbuf(bitbuf, bufbits, compressed_words, next_word_index);
}
bufbits += 11;
maybe_flush_bitbuf(bitbuf, bufbits, compressed_words, next_word_index);
if (bufbits > 0) { if (bufbits >= 32) throw std::logic_error("bufbits >= 32");
compressed_words[next_word_index++] = bitbuf & 0xffffffff;
bitbuf = 0; bufbits = 0; }
return next_word_index;
}
template<typename A>
void cpc_compressor<A>::low_level_uncompress_bytes(
uint8_t* byte_array, uint32_t num_bytes_to_decode,
const uint16_t* decoding_table,
const uint32_t* compressed_words, uint32_t num_compressed_words
) const {
uint32_t word_index = 0;
uint64_t bitbuf = 0;
uint8_t bufbits = 0;
if (byte_array == nullptr) throw std::logic_error("byte_array == NULL");
if (decoding_table == nullptr) throw std::logic_error("decoding_table == NULL");
if (compressed_words == nullptr) throw std::logic_error("compressed_words == NULL");
for (uint32_t byte_index = 0; byte_index < num_bytes_to_decode; byte_index++) {
maybe_fill_bitbuf(bitbuf, bufbits, compressed_words, word_index, 12);
const size_t peek12 = bitbuf & 0xfff; const uint16_t lookup = decoding_table[peek12];
const uint8_t code_word_length = lookup >> 8;
const uint8_t decoded_byte = lookup & 0xff;
byte_array[byte_index] = decoded_byte;
bitbuf >>= code_word_length;
bufbits -= code_word_length;
}
if (word_index > num_compressed_words) throw std::logic_error("word_index > num_compressed_words");
}
static inline uint64_t read_unary(
const uint32_t* compressed_words,
uint32_t& next_word_index,
uint64_t& bitbuf,
uint8_t& bufbits
);
static inline void write_unary(
uint32_t* compressed_words,
uint32_t& next_word_index_ptr,
uint64_t& bit_buf_ptr,
uint8_t& buf_bits_ptr,
uint64_t value
);
template<typename A>
uint32_t cpc_compressor<A>::low_level_compress_pairs(
const uint32_t* pair_array, uint32_t num_pairs_to_encode,
uint8_t num_base_bits,
uint32_t* compressed_words ) const {
uint64_t bitbuf = 0;
uint8_t bufbits = 0;
uint32_t next_word_index = 0;
const uint64_t golomb_lo_mask = (1 << num_base_bits) - 1;
uint32_t predicted_row_index = 0;
uint8_t predicted_col_index = 0;
for (uint32_t pair_index = 0; pair_index < num_pairs_to_encode; pair_index++) {
const uint32_t row_col = pair_array[pair_index];
const uint32_t row_index = row_col >> 6;
const uint8_t col_index = row_col & 63;
if (row_index != predicted_row_index) predicted_col_index = 0;
if (row_index < predicted_row_index) throw std::logic_error("row_index < predicted_row_index");
if (col_index < predicted_col_index) throw std::logic_error("col_index < predicted_col_index");
const uint32_t y_delta = row_index - predicted_row_index;
const uint8_t x_delta = col_index - predicted_col_index;
predicted_row_index = row_index;
predicted_col_index = col_index + 1;
const uint16_t code_info = length_limited_unary_encoding_table65[x_delta];
const uint64_t code_val = code_info & 0xfff;
const uint8_t code_len = static_cast<uint8_t>(code_info >> 12);
bitbuf |= code_val << bufbits;
bufbits += code_len;
maybe_flush_bitbuf(bitbuf, bufbits, compressed_words, next_word_index);
const uint64_t golomb_lo = y_delta & golomb_lo_mask;
const uint64_t golomb_hi = y_delta >> num_base_bits;
write_unary(compressed_words, next_word_index, bitbuf, bufbits, golomb_hi);
bitbuf |= golomb_lo << bufbits;
bufbits += num_base_bits;
maybe_flush_bitbuf(bitbuf, bufbits, compressed_words, next_word_index);
}
const uint8_t padding = (num_base_bits > 10) ? 0 : 10 - num_base_bits;
bufbits += padding;
maybe_flush_bitbuf(bitbuf, bufbits, compressed_words, next_word_index);
if (bufbits > 0) { if (bufbits >= 32) throw std::logic_error("bufbits >= 32");
compressed_words[next_word_index++] = bitbuf & 0xffffffff;
bitbuf = 0; bufbits = 0; }
return next_word_index;
}
template<typename A>
void cpc_compressor<A>::low_level_uncompress_pairs(
uint32_t* pair_array, uint32_t num_pairs_to_decode,
uint8_t num_base_bits,
const uint32_t* compressed_words, uint32_t num_compressed_words
) const {
uint32_t word_index = 0;
uint64_t bitbuf = 0;
uint8_t bufbits = 0;
const uint64_t golomb_lo_mask = (1 << num_base_bits) - 1;
uint32_t predicted_row_index = 0;
uint8_t predicted_col_index = 0;
for (uint32_t pair_index = 0; pair_index < num_pairs_to_decode; pair_index++) {
maybe_fill_bitbuf(bitbuf, bufbits, compressed_words, word_index, 12); const size_t peek12 = bitbuf & 0xfff;
const uint16_t lookup = length_limited_unary_decoding_table65[peek12];
const uint8_t code_word_length = lookup >> 8;
const int8_t x_delta = lookup & 0xff;
bitbuf >>= code_word_length;
bufbits -= code_word_length;
const uint64_t golomb_hi = read_unary(compressed_words, word_index, bitbuf, bufbits);
maybe_fill_bitbuf(bitbuf, bufbits, compressed_words, word_index, num_base_bits); const uint64_t golomb_lo = bitbuf & golomb_lo_mask;
bitbuf >>= num_base_bits;
bufbits -= num_base_bits;
const int64_t y_delta = (golomb_hi << num_base_bits) | golomb_lo;
if (y_delta > 0) predicted_col_index = 0;
const uint32_t row_index = static_cast<uint32_t>(predicted_row_index + y_delta);
const uint8_t col_index = predicted_col_index + x_delta;
const uint32_t row_col = (row_index << 6) | col_index;
pair_array[pair_index] = row_col;
predicted_row_index = row_index;
predicted_col_index = col_index + 1;
}
if (word_index > num_compressed_words) throw std::logic_error("word_index > num_compressed_words"); }
uint64_t read_unary(
const uint32_t* compressed_words,
uint32_t& next_word_index,
uint64_t& bitbuf,
uint8_t& bufbits
) {
if (compressed_words == nullptr) throw std::logic_error("compressed_words == NULL");
size_t subtotal = 0;
while (true) {
maybe_fill_bitbuf(bitbuf, bufbits, compressed_words, next_word_index, 8);
const uint8_t peek8 = bitbuf & 0xff; const uint8_t trailing_zeros = byte_trailing_zeros_table[peek8];
if (trailing_zeros > 8) throw std::out_of_range("trailing_zeros out of range");
if (trailing_zeros < 8) {
bufbits -= 1 + trailing_zeros;
bitbuf >>= 1 + trailing_zeros;
return subtotal + trailing_zeros;
}
subtotal += 8;
bufbits -= 8;
bitbuf >>= 8;
}
}
void write_unary(
uint32_t* compressed_words,
uint32_t& next_word_index,
uint64_t& bitbuf,
uint8_t& bufbits,
uint64_t value
) {
if (compressed_words == nullptr) throw std::logic_error("compressed_words == NULL");
if (bufbits > 31) throw std::out_of_range("bufbits out of range");
uint64_t remaining = value;
while (remaining >= 16) {
remaining -= 16;
bufbits += 16; maybe_flush_bitbuf(bitbuf, bufbits, compressed_words, next_word_index);
}
if (remaining > 15) throw std::out_of_range("remaining out of range");
const uint64_t the_unary_code = 1ULL << remaining;
bitbuf |= the_unary_code << bufbits;
bufbits += static_cast<uint8_t>(remaining + 1);
maybe_flush_bitbuf(bitbuf, bufbits, compressed_words, next_word_index);
}
template<typename A>
vector_u32<A> cpc_compressor<A>::tricky_get_pairs_from_window(const uint8_t* window, uint32_t k, uint32_t num_pairs_to_get,
uint32_t empty_space, const A& allocator) {
const size_t output_length = empty_space + num_pairs_to_get;
vector_u32<A> pairs(output_length, 0, allocator);
size_t pair_index = empty_space;
for (unsigned row_index = 0; row_index < k; row_index++) {
uint8_t byte = window[row_index];
while (byte != 0) {
const uint8_t col_index = byte_trailing_zeros_table[byte];
byte = byte ^ (1 << col_index); pairs[pair_index++] = (row_index << 6) | col_index;
}
}
if (pair_index != output_length) throw std::logic_error("pair_index != output_length");
return pairs;
}
template<typename A>
uint8_t cpc_compressor<A>::golomb_choose_number_of_base_bits(uint32_t k, uint64_t count) {
if (k < 1) throw std::invalid_argument("golomb_choose_number_of_base_bits: k < 1");
if (count < 1) throw std::invalid_argument("golomb_choose_number_of_base_bits: count < 1");
const uint64_t quotient = (k - count) / count; if (quotient == 0) return 0;
else return floor_log2_of_long(quotient);
}
}
#endif