#ifndef FASTPFOR_H_
#define FASTPFOR_H_
#include "blockpacking.h"
#include "codecs.h"
#include "common.h"
#include "cpubenchmark.h"
#include "packingvectors.h"
#include "simple8b.h"
namespace FastPForLib {
template <uint32_t BlockSizeInUnitsOfPackSize =
8,
typename IntType =
uint32_t >
class FastPForImpl {
public:
FastPForImpl(uint32_t ps = 65536)
: PageSize(ps),
bitsPageSize(gccbits(PageSize)),
datatobepacked(sizeof(IntType) * 8 + 1),
bytescontainer(PageSize + 3 * PageSize / BlockSize) {
assert(ps / BlockSize * BlockSize == ps);
assert(gccbits(static_cast<uint32_t>(BlockSizeInUnitsOfPackSize * PACKSIZE - 1)) <= 8);
}
enum {
PACKSIZE = 32,
overheadofeachexcept = 8,
overheadduetobits = 8,
overheadduetonmbrexcept = 8,
BlockSize = BlockSizeInUnitsOfPackSize * PACKSIZE
};
void resetBuffer() {
for (size_t i = 0; i < datatobepacked.size(); ++i) {
std::vector<IntType>().swap(datatobepacked[i]);
}
}
const uint32_t PageSize;
const uint32_t bitsPageSize;
std::vector<std::vector<IntType>> datatobepacked;
std::vector<uint8_t> bytescontainer;
const uint32_t *decodeArray(const uint32_t *in, const size_t length,
IntType *out, size_t &nvalue) {
const uint32_t *const initin(in);
const size_t mynvalue = *in;
++in;
if (mynvalue > nvalue) throw NotEnoughStorage(mynvalue);
nvalue = mynvalue;
const IntType *const finalout(out + nvalue);
while (out != finalout) {
size_t thisnvalue(0);
size_t thissize = static_cast<size_t>(
finalout > PageSize + out ? PageSize : (finalout - out));
__decodeArray(in, thisnvalue, out, thissize);
in += thisnvalue;
out += thissize;
}
if (initin + length < in) {
throw std::logic_error(
"Decode run over output buffer. Potential buffer overflow!");
}
resetBuffer(); return in;
}
void encodeArray(const IntType *in, const size_t length, uint32_t *out,
size_t &nvalue) {
checkifdivisibleby(length, BlockSize);
const IntType *const finalin(in + length);
*out++ = static_cast<uint32_t>(length);
const size_t oldnvalue = nvalue;
nvalue = 1;
while (in != finalin) {
size_t thissize = static_cast<size_t>(
finalin > PageSize + in ? PageSize : (finalin - in));
size_t thisnvalue(0);
__encodeArray(in, thissize, out, thisnvalue);
nvalue += thisnvalue;
out += thisnvalue;
in += thissize;
}
if (oldnvalue < nvalue) {
std::string msg;
msg = "It is possible we have a buffer overrun. You reported having "
"allocated "
+ std::to_string(oldnvalue * sizeof(uint32_t))
+ " bytes for the compressed data but we needed "
+ std::to_string(nvalue * sizeof(uint32_t))
+ " bytes. Please increase the available memory"
" for compressed data or check the value of the last parameter "
"provided "
" to the encodeArray method.";
throw std::logic_error(msg);
}
resetBuffer(); }
void getBestBFromData(const IntType *in, uint8_t &bestb, uint8_t &bestcexcept,
uint8_t &maxb) {
uint8_t bits = sizeof(IntType) * 8;
uint32_t freqs[65];
for (uint32_t k = 0; k <= bits; ++k) freqs[k] = 0;
for (uint32_t k = 0; k < BlockSize; ++k) {
freqs[asmbits(in[k])]++;
}
bestb = bits;
while (freqs[bestb] == 0) bestb--;
maxb = bestb;
uint32_t bestcost = bestb * BlockSize;
uint32_t cexcept = 0;
bestcexcept = static_cast<uint8_t>(cexcept);
for (uint32_t b = bestb - 1; b < bits; --b) {
cexcept += freqs[b + 1];
uint32_t thiscost = cexcept * overheadofeachexcept +
cexcept * (maxb - b) + b * BlockSize +
8; if (maxb - b == 1) thiscost -= cexcept;
if (thiscost < bestcost) {
bestcost = thiscost;
bestb = static_cast<uint8_t>(b);
bestcexcept = static_cast<uint8_t>(cexcept);
}
}
}
void __encodeArray(const IntType *in, const size_t length, uint32_t *out,
size_t &nvalue) {
uint32_t *const initout = out; checkifdivisibleby(length, BlockSize);
uint32_t *const headerout = out++; for (uint32_t k = 0; k < sizeof(IntType) * 8 + 1; ++k) {
datatobepacked[k].clear();
}
uint8_t *bc = &bytescontainer[0];
for (const IntType *const final = in + length; (in + BlockSize <= final);
in += BlockSize) {
uint8_t bestb, bestcexcept, maxb;
getBestBFromData(in, bestb, bestcexcept, maxb);
*bc++ = bestb;
*bc++ = bestcexcept;
if (bestcexcept > 0) {
*bc++ = maxb;
std::vector<IntType> &thisexceptioncontainer =
datatobepacked[maxb - bestb];
const IntType maxval = static_cast<IntType>(1ULL << bestb);
for (uint32_t k = 0; k < BlockSize; ++k) {
if (in[k] >= maxval) {
thisexceptioncontainer.push_back(in[k] >> bestb);
*bc++ = static_cast<uint8_t>(k);
}
}
}
out = packblockup<BlockSize>(in, out, bestb);
}
headerout[0] = static_cast<uint32_t>(out - headerout);
const uint32_t bytescontainersize =
static_cast<uint32_t>(bc - &bytescontainer[0]);
*(out++) = bytescontainersize;
memcpy(out, &bytescontainer[0], bytescontainersize);
uint8_t* pad8 = (uint8_t*)out + bytescontainersize;
out += (bytescontainersize + sizeof(uint32_t) - 1) / sizeof(uint32_t);
while (pad8 < (uint8_t*)out)
*pad8++ = 0;
IntType bitmap = 0;
for (uint32_t k = 2; k <= sizeof(IntType) * 8; ++k) {
if (datatobepacked[k].size() != 0) bitmap |= (1ULL << (k - 1));
}
*(reinterpret_cast<IntType *>(out)) = bitmap;
out += (sizeof(IntType) + sizeof(uint32_t) - 1) / sizeof(uint32_t);
for (uint32_t k = 2; k <= sizeof(IntType) * 8; ++k) {
if (datatobepacked[k].size() > 0) {
size_t nValue = datatobepacked[k].size();
datatobepacked[k].resize((datatobepacked[k].size() + 32 - 1) / 32 * 32);
out = packingvector<32>::packmeuptightwithoutmask(
datatobepacked[k].data(), nValue, out, k);
}
}
nvalue = out - initout;
}
void __decodeArray(const uint32_t *in, size_t &length, IntType *out,
const size_t nvalue) {
const uint32_t *const initin = in;
const uint32_t *const headerin = in++;
const uint32_t wheremeta = headerin[0];
const uint32_t *inexcept = headerin + wheremeta;
const uint32_t bytesize = *inexcept++;
const uint8_t *bytep = reinterpret_cast<const uint8_t *>(inexcept);
inexcept += (bytesize + sizeof(uint32_t) - 1) / sizeof(uint32_t);
IntType bitmap = *(reinterpret_cast<const IntType *>(inexcept));
inexcept += (sizeof(IntType) + sizeof(uint32_t) - 1) / sizeof(uint32_t);
for (uint32_t k = 2; k <= sizeof(IntType) * 8; ++k) {
if ((bitmap & (1ULL << (k - 1))) != 0) {
uint32_t tnvalue = *inexcept;
datatobepacked[k].resize((tnvalue + PACKSIZE - 1) / PACKSIZE * PACKSIZE);
inexcept = packingvector<32>::unpackmetight(
inexcept, datatobepacked[k].data(), datatobepacked[k].size(), k);
datatobepacked[k].resize(tnvalue);
}
}
length = inexcept - initin;
typename std::vector<IntType>::const_iterator
unpackpointers[sizeof(IntType) * 8 + 1];
for (uint32_t k = 1; k <= sizeof(IntType) * 8; ++k) {
unpackpointers[k] = datatobepacked[k].begin();
}
for (uint32_t run = 0; run < nvalue / BlockSize; ++run, out += BlockSize) {
const uint8_t b = *bytep++;
const uint8_t cexcept = *bytep++;
in = unpackblock<BlockSize>(in, out, b);
if (cexcept > 0) {
const uint8_t maxbits = *bytep++;
if (maxbits - b == 1) {
for (uint32_t k = 0; k < cexcept; ++k) {
const uint8_t pos = *(bytep++);
out[pos] |= static_cast<uint64_t>(1) << b;
}
} else {
typename std::vector<IntType>::const_iterator &exceptionsptr =
unpackpointers[maxbits - b];
for (uint32_t k = 0; k < cexcept; ++k) {
const uint8_t pos = *(bytep++);
out[pos] |= (static_cast<uint64_t>(*(exceptionsptr++))) << b;
}
}
}
}
assert(in == headerin + wheremeta);
}
};
template <uint32_t BlockSizeInUnitsOfPackSize = 8>
class FastPFor : public IntegerCODEC {
public:
FastPFor(uint32_t pageSize = 65536) : pfor32(pageSize) {}
enum {
PACKSIZE = 32,
overheadofeachexcept = 8,
overheadduetobits = 8,
overheadduetonmbrexcept = 8,
BlockSize = BlockSizeInUnitsOfPackSize * PACKSIZE
};
void encodeArray(const uint32_t *in, const size_t length, uint32_t *out,
size_t &nvalue) override {
pfor32.encodeArray(in, length, out, nvalue);
}
void encodeArray(const uint64_t *in, const size_t length, uint32_t *out,
size_t &nvalue) override {
pfor64.encodeArray(in, length, out, nvalue);
}
const uint32_t *decodeArray(const uint32_t *in, const size_t length,
uint32_t *out, size_t &nvalue) override {
return pfor32.decodeArray(in, length, out, nvalue);
}
const uint32_t *decodeArray(const uint32_t *in, const size_t length,
uint64_t *out, size_t &nvalue) override {
return pfor64.decodeArray(in, length, out, nvalue);
}
std::string name() const override {
return std::string("FastPFor") + std::to_string(BlockSize);
}
private:
FastPForImpl<BlockSizeInUnitsOfPackSize, uint32_t> pfor32;
FastPForImpl<BlockSizeInUnitsOfPackSize, uint64_t> pfor64;
};
template <class EXCEPTIONCODER = Simple8b<true>>
class SimplePFor : public IntegerCODEC {
public:
using IntegerCODEC::encodeArray;
using IntegerCODEC::decodeArray;
EXCEPTIONCODER ecoder;
SimplePFor(uint32_t ps = 65536)
: ecoder(), PageSize(ps), bitsPageSize(gccbits(PageSize)),
datatobepacked(PageSize),
bytescontainer(PageSize + 3 * PageSize / BlockSize) {
assert(ps / BlockSize * BlockSize == ps);
assert(gccbits(static_cast<uint32_t>(BlockSizeInUnitsOfPackSize * PACKSIZE - 1)) <= 8);
}
enum {
BlockSizeInUnitsOfPackSize = 4,
PACKSIZE = 32,
overheadofeachexcept = 8,
overheadduetobits = 8,
overheadduetonmbrexcept = 8,
BlockSize = BlockSizeInUnitsOfPackSize * PACKSIZE
};
const uint32_t PageSize;
const uint32_t bitsPageSize;
std::vector<uint32_t> datatobepacked;
std::vector<uint8_t> bytescontainer;
const uint32_t *decodeArray(const uint32_t *in, const size_t length,
uint32_t *out, size_t &nvalue) override {
const uint32_t *const initin(in);
const size_t mynvalue = *in;
++in;
if (mynvalue > nvalue)
throw NotEnoughStorage(mynvalue);
nvalue = mynvalue;
const uint32_t *const finalout(out + nvalue);
while (out != finalout) {
size_t thisnvalue = length - (in - initin);
size_t thissize = static_cast<size_t>(
finalout > PageSize + out ? PageSize : (finalout - out));
__decodeArray(in, thisnvalue, out, thissize);
in += thisnvalue;
out += thissize;
}
assert(initin + length >= in);
return in;
}
void encodeArray(const uint32_t *in, const size_t length, uint32_t *out,
size_t &nvalue) override {
checkifdivisibleby(length, BlockSize);
const uint32_t *const initout(out);
const uint32_t *const finalin(in + length);
*out++ = static_cast<uint32_t>(length);
const size_t oldnvalue = nvalue;
nvalue = 1;
while (in != finalin) {
size_t thissize = static_cast<size_t>(
finalin > PageSize + in ? PageSize : (finalin - in));
size_t thisnvalue = oldnvalue - (out - initout);
__encodeArray(in, thissize, out, thisnvalue);
nvalue += thisnvalue;
out += thisnvalue;
in += thissize;
}
assert(out == nvalue + initout);
if (oldnvalue < nvalue)
fprintf(stderr,
"It is possible we have a buffer overrun. You reported having allocated "
"%zu bytes for the compressed data but we needed "
"%zu bytes. Please increase the available memory "
"for compressed data or check the value of the last parameter provided "
"to the encodeArray method.\n",
oldnvalue * sizeof(uint32_t), nvalue * sizeof(uint32_t));
}
void getBestBFromData(const uint32_t *in, uint8_t &bestb,
uint8_t &bestcexcept, uint8_t &maxb) {
uint32_t freqs[33];
for (uint32_t k = 0; k <= 32; ++k)
freqs[k] = 0;
for (uint32_t k = 0; k < BlockSize; ++k) {
freqs[asmbits(in[k])]++;
}
bestb = 32;
while (freqs[bestb] == 0)
bestb--;
maxb = bestb;
uint32_t bestcost = bestb * BlockSize;
uint32_t cexcept = 0;
bestcexcept = static_cast<uint8_t>(cexcept);
for (uint32_t b = bestb - 1; b < 32; --b) {
cexcept += freqs[b + 1];
uint32_t thiscost =
cexcept * overheadofeachexcept + cexcept * (maxb - b) + b * BlockSize;
if (thiscost < bestcost) {
bestcost = thiscost;
bestb = static_cast<uint8_t>(b);
bestcexcept = static_cast<uint8_t>(cexcept);
}
}
}
void __encodeArray(const uint32_t *in, const size_t length, uint32_t *out,
size_t &nvalue) {
uint32_t *const initout = out; checkifdivisibleby(length, BlockSize);
uint32_t *const headerout = out++; datatobepacked.clear();
uint8_t *bc = &bytescontainer[0];
for (const uint32_t *const final = in + length; (in + BlockSize <= final);
in += BlockSize) {
uint8_t bestb, bestcexcept, maxb;
getBestBFromData(in, bestb, bestcexcept, maxb);
*bc++ = bestb;
*bc++ = bestcexcept;
if (bestcexcept > 0) {
assert(bestb < 32);
const uint32_t maxval = 1U << bestb;
for (uint32_t k = 0; k < BlockSize; ++k) {
if (in[k] >= maxval) {
datatobepacked.push_back(in[k] >> bestb);
*bc++ = static_cast<uint8_t>(k);
}
}
}
out = packblockup<BlockSize>(in, out, bestb);
}
headerout[0] = static_cast<uint32_t>(out - headerout);
const uint32_t bytescontainersize =
static_cast<uint32_t>(bc - &bytescontainer[0]);
*(out++) = bytescontainersize;
memcpy(out, &bytescontainer[0], bytescontainersize);
uint8_t* pad8 = (uint8_t*)out + bytescontainersize;
out += (bytescontainersize + sizeof(uint32_t) - 1) / sizeof(uint32_t);
while (pad8 < (uint8_t*)out)
*pad8++ = 0;
size_t outcap = 0;
ecoder.encodeArray(datatobepacked.data(), datatobepacked.size(), out,
outcap);
out += outcap;
nvalue = out - initout;
}
void __decodeArray(const uint32_t *in, size_t &length, uint32_t *out,
const size_t nvalue) {
const uint32_t *const initin = in;
const uint32_t *const headerin = in++;
const uint32_t wheremeta = headerin[0];
const uint32_t *inexcept = headerin + wheremeta;
const uint32_t bytesize = *inexcept++;
const uint8_t *bytep = reinterpret_cast<const uint8_t *>(inexcept);
inexcept += (bytesize + sizeof(uint32_t) - 1) / sizeof(uint32_t);
datatobepacked.resize(datatobepacked.capacity());
size_t cap = datatobepacked.size();
size_t le = initin + length - inexcept;
inexcept = ecoder.decodeArray(inexcept, le, &datatobepacked[0], cap);
length = inexcept - initin;
auto exceptionsptr = datatobepacked.begin();
for (uint32_t run = 0; run < nvalue / BlockSize; ++run, out += BlockSize) {
const uint8_t b = *bytep++;
const uint8_t cexcept = *bytep++;
in = unpackblock<BlockSize>(in, out, b);
for (uint32_t k = 0; k < cexcept; ++k) {
const uint8_t pos = *(bytep++);
out[pos] |= (*(exceptionsptr++)) << b;
}
}
assert(in == headerin + wheremeta);
}
std::string name() const override { return "SimplePFor"; }
};
}
#endif