#pragma once
#include "FastPFor/headers/VarIntG8IU.h"
#include "FastPFor/headers/optpfor.h"
#include "FastPFor/headers/variablebyte.h"
#include "interpolative_coding.hpp"
#include "qmx_codec.hpp"
#include "succinct/util.hpp"
#include "util.hpp"
namespace ds2i {
class TightVariableByte {
public:
template <uint32_t i> static uint8_t extract7bits(const uint32_t val) {
return static_cast<uint8_t>((val >> (7 * i)) & ((1U << 7) - 1));
}
template <uint32_t i>
static uint8_t extract7bitsmaskless(const uint32_t val) {
return static_cast<uint8_t>((val >> (7 * i)));
}
static void encode(const uint32_t *in, const size_t length, uint8_t *out,
size_t &nvalue) {
uint8_t *bout = out;
for (size_t k = 0; k < length; ++k) {
const uint32_t val(in[k]);
if (val < (1U << 7)) {
*bout = static_cast<uint8_t>(val | (1U << 7));
++bout;
} else if (val < (1U << 14)) {
*bout = extract7bits<0>(val);
++bout;
*bout = extract7bitsmaskless<1>(val) | (1U << 7);
++bout;
} else if (val < (1U << 21)) {
*bout = extract7bits<0>(val);
++bout;
*bout = extract7bits<1>(val);
++bout;
*bout = extract7bitsmaskless<2>(val) | (1U << 7);
++bout;
} else if (val < (1U << 28)) {
*bout = extract7bits<0>(val);
++bout;
*bout = extract7bits<1>(val);
++bout;
*bout = extract7bits<2>(val);
++bout;
*bout = extract7bitsmaskless<3>(val) | (1U << 7);
++bout;
} else {
*bout = extract7bits<0>(val);
++bout;
*bout = extract7bits<1>(val);
++bout;
*bout = extract7bits<2>(val);
++bout;
*bout = extract7bits<3>(val);
++bout;
*bout = extract7bitsmaskless<4>(val) | (1U << 7);
++bout;
}
}
nvalue = bout - out;
}
static void encode_single(uint32_t val, std::vector<uint8_t> &out) {
uint8_t buf[5];
size_t nvalue;
encode(&val, 1, buf, nvalue);
out.insert(out.end(), buf, buf + nvalue);
}
static uint8_t const *decode(const uint8_t *in, uint32_t *out, size_t n) {
const uint8_t *inbyte = in;
for (size_t i = 0; i < n; ++i) {
unsigned int shift = 0;
for (uint32_t v = 0;; shift += 7) {
uint8_t c = *inbyte++;
v += ((c & 127) << shift);
if ((c & 128)) {
*out++ = v;
break;
}
}
}
return inbyte;
}
};
struct interpolative_block {
static const uint64_t block_size = 128;
static const uint64_t overflow = 0;
static void encode(uint32_t const *in, uint32_t sum_of_values, size_t n,
std::vector<uint8_t> &out) {
assert(n <= block_size);
thread_local std::vector<uint32_t> inbuf(block_size);
thread_local std::vector<uint32_t> outbuf;
inbuf[0] = *in;
for (size_t i = 1; i < n; ++i) {
inbuf[i] = inbuf[i - 1] + in[i];
}
if (sum_of_values == uint32_t(-1)) {
sum_of_values = inbuf[n - 1];
TightVariableByte::encode_single(sum_of_values, out);
}
bit_writer bw(outbuf);
bw.write_interpolative(inbuf.data(), n - 1, 0, sum_of_values);
uint8_t const *bufptr = (uint8_t const *)outbuf.data();
out.insert(out.end(), bufptr,
bufptr + succinct::util::ceil_div(bw.size(), 8));
}
static uint8_t const *DS2I_NOINLINE decode(uint8_t const *in, uint32_t *out,
uint32_t sum_of_values, size_t n) {
assert(n <= block_size);
uint8_t const *inbuf = in;
if (sum_of_values == uint32_t(-1)) {
inbuf = TightVariableByte::decode(inbuf, &sum_of_values, 1);
}
out[n - 1] = sum_of_values;
size_t read_interpolative = 0;
if (n > 1) {
bit_reader br((uint32_t const *)inbuf);
br.read_interpolative(out, n - 1, 0, sum_of_values);
for (size_t i = n - 1; i > 0; --i) {
out[i] -= out[i - 1];
}
read_interpolative = succinct::util::ceil_div(br.position(), 8);
}
return inbuf + read_interpolative;
}
};
struct optpfor_block {
struct codec_type : FastPFor::OPTPFor<4, FastPFor::Simple16<false>> {
uint8_t const *force_b;
uint32_t findBestB(const uint32_t *in, uint32_t len) {
if (force_b) {
return *force_b;
}
uint32_t b = 0;
uint32_t bsize = std::numeric_limits<uint32_t>::max();
const uint32_t mb = FastPFor::maxbits(in, in + len);
uint32_t i = 0;
while (mb > 28 + possLogs[i])
++i;
for (; i < possLogs.size(); i++) {
if (possLogs[i] > mb && possLogs[i] >= mb)
break;
const uint32_t csize = tryB(possLogs[i], in, len);
if (csize <= bsize) {
b = possLogs[i];
bsize = csize;
}
}
return b;
}
};
static const uint64_t block_size = codec_type::BlockSize;
static const uint64_t overflow = 0;
static void encode(uint32_t const *in, uint32_t sum_of_values, size_t n,
std::vector<uint8_t> &out,
uint8_t const *b = nullptr) {
thread_local codec_type optpfor_codec;
thread_local std::vector<uint8_t> buf(2 * 4 * block_size);
assert(n <= block_size);
if (n < block_size) {
interpolative_block::encode(in, sum_of_values, n, out);
return;
}
size_t out_len = buf.size();
optpfor_codec.force_b = b;
optpfor_codec.encodeBlock(in, reinterpret_cast<uint32_t *>(buf.data()),
out_len);
out_len *= 4;
out.insert(out.end(), buf.data(), buf.data() + out_len);
}
static uint8_t const *DS2I_NOINLINE decode(uint8_t const *in, uint32_t *out,
uint32_t sum_of_values, size_t n) {
thread_local codec_type optpfor_codec; assert(n <= block_size);
if (DS2I_UNLIKELY(n < block_size)) {
return interpolative_block::decode(in, out, sum_of_values, n);
}
size_t out_len = block_size;
uint8_t const *ret;
ret = reinterpret_cast<uint8_t const *>(optpfor_codec.decodeBlock(
reinterpret_cast<uint32_t const *>(in), out, out_len));
assert(out_len == n);
return ret;
}
};
struct varint_G8IU_block {
static const uint64_t block_size = 128;
static const uint64_t overflow = 0;
struct codec_type : FastPFor::VarIntG8IU {
uint32_t decodeBlock(uint8_t const *&src, uint32_t *dst) const {
uint8_t desc = *src;
src += 1;
const __m128i data =
_mm_lddqu_si128(reinterpret_cast<__m128i const *>(src));
src += 8;
const __m128i result = _mm_shuffle_epi8(data, vecmask[desc][0]);
_mm_storeu_si128(reinterpret_cast<__m128i *>(dst), result);
int readSize = maskOutputSize[desc];
if (readSize > 4) {
const __m128i result2 = _mm_shuffle_epi8(
data, vecmask[desc][1]); _mm_storeu_si128(
reinterpret_cast<__m128i *>(dst + 4),
result2); }
return readSize;
}
};
static void encode(uint32_t const *in, uint32_t sum_of_values, size_t n,
std::vector<uint8_t> &out) {
thread_local codec_type varint_codec;
thread_local std::vector<uint8_t> buf(2 * 4 * block_size);
assert(n <= block_size);
if (n < block_size) {
interpolative_block::encode(in, sum_of_values, n, out);
return;
}
size_t out_len = buf.size();
const uint32_t *src = in;
unsigned char *dst = buf.data();
size_t srclen = n * 4;
size_t dstlen = out_len;
out_len = 0;
while (srclen > 0 && dstlen >= 9) {
out_len += varint_codec.encodeBlock(src, srclen, dst, dstlen);
}
assert(srclen == 0);
out.insert(out.end(), buf.data(), buf.data() + out_len);
}
static uint8_t const *decode(uint8_t const *in, uint32_t *out,
uint32_t sum_of_values, size_t n) {
static codec_type varint_codec; assert(n <= block_size);
if (DS2I_UNLIKELY(n < block_size)) {
return interpolative_block::decode(in, out, sum_of_values, n);
}
size_t out_len = 0;
uint8_t const *src = in;
uint32_t *dst = out;
while (out_len <= (n - 8)) {
out_len += varint_codec.decodeBlock(src, dst + out_len);
}
while (out_len < n) {
uint32_t buf[8];
size_t read = varint_codec.decodeBlock(src, buf);
size_t needed = std::min(read, n - out_len);
memcpy(dst + out_len, buf, needed * 4);
out_len += needed;
}
assert(out_len == n);
return src;
}
};
struct qmx_block {
static const uint64_t block_size = 128;
static const uint64_t overflow = 512; static void encode(uint32_t const *in, uint32_t sum_of_values, size_t n,
std::vector<uint8_t> &out) {
assert(n <= block_size);
if (n < block_size) {
interpolative_block::encode(in, sum_of_values, n, out);
return;
}
thread_local QMX::codec<block_size> qmx_codec;
thread_local std::vector<uint8_t> buf( (overflow*4) + 2 * 4 * block_size);
size_t out_len = buf.size();
out_len = qmx_codec.encode(buf.data(),in);
TightVariableByte::encode_single(out_len, out);
out.insert(out.end(), buf.data(), buf.data() + out_len);
}
static uint8_t const *decode(uint8_t const *in, uint32_t *out,
uint32_t sum_of_values, size_t n) {
static QMX::codec<block_size> qmx_codec; assert(n <= block_size);
if (DS2I_UNLIKELY(n < block_size)) {
return interpolative_block::decode(in, out, sum_of_values, n);
}
uint32_t enc_len = 0;
in = TightVariableByte::decode(in, &enc_len, 1);
qmx_codec.decode(out,in, enc_len);
return in + enc_len;
}
};
}