#if (!defined(__SSSE3__) && !(defined(_MSC_VER) && defined(__AVX__))) && !(defined(__ARM_NEON) || defined(__aarch64__))
#ifndef _MSC_VER
#pragma message \
"Disabling varintg8iu due to lack of SSSE3 support, try adding -mssse3 or the equivalent on your compiler"
#else
#pragma message("Disabling varintg8iu due to lack of SSSE3 support, try adding -mssse3 or the equivalent on your compiler")
#endif
#else
#ifndef VARINTG8IU_H__
#define VARINTG8IU_H__
#if defined(__GNUC__) && (defined(__x86_64__) || defined(__i386__))
#include <emmintrin.h>
#elif defined(__aarch64__)
#include <simde/x86/sse3.h>
#endif
#include "codecs.h"
#ifdef __GNUC__
#define PREDICT_FALSE(x) (__builtin_expect(x, 0))
#else
#define PREDICT_FALSE(x) x
#endif
namespace FastPForLib {
class VarIntG8IU : public IntegerCODEC {
public:
VarIntG8IU() {
char mask[256][32];
for (int desc = 0; desc <= 255; desc++) {
int bitmask = 0x00000001;
int bitindex = 0;
int complete = 0;
int ithSize[8];
int lastpos = -1;
while (bitindex < 8) {
if ((desc & bitmask) == 0) {
ithSize[complete] = bitindex - lastpos;
lastpos = bitindex;
complete++;
}
bitindex++;
bitmask = bitmask << 1;
}
maskOutputSize[desc] = complete;
int j = 0;
int k = 0;
for (int i = 0; i < complete; i++) {
for (int n = 0; n < 4; n++) {
if (n < ithSize[i]) {
mask[desc][k] = static_cast<unsigned char>(j);
j = j + 1;
} else {
mask[desc][k] = -1;
}
k = k + 1;
}
}
}
for (int desc = 0; desc <= 255; desc++) {
vecmask[desc][0] =
_mm_lddqu_si128(reinterpret_cast<__m128i const *>(mask[desc]));
vecmask[desc][1] =
_mm_lddqu_si128(reinterpret_cast<__m128i const *>(mask[desc] + 16));
}
}
void encodeArray(const uint32_t *in, const size_t length, uint32_t *out,
size_t &nvalue) {
const uint32_t *src = in;
size_t srclength = length * 4;
unsigned char *dst = reinterpret_cast<unsigned char *>(out);
nvalue = nvalue * 4;
size_t compressed_size = 0;
while (srclength > 0 && nvalue >= 9) {
compressed_size += encodeBlock(src, srclength, dst, nvalue);
}
nvalue = ((compressed_size + 3) / 4);
while (dst < reinterpret_cast<unsigned char*>(out + nvalue)) {
*dst++ = 0; }
}
const uint32_t *decodeArray(const uint32_t *in, const size_t length,
uint32_t *out, size_t &nvalue) {
const unsigned char *src = reinterpret_cast<const unsigned char *>(in);
const uint32_t *const initdst = out;
uint32_t *dst = out;
size_t srclength = length * 4;
for (; srclength >= 22; srclength -= 8, src += 8) {
unsigned char desc = *src;
src += 1;
srclength -= 1;
const __m128i data =
_mm_lddqu_si128(reinterpret_cast<__m128i const *>(src));
const __m128i result = _mm_shuffle_epi8(data, vecmask[desc][0]);
_mm_storeu_si128(reinterpret_cast<__m128i *>(dst), result);
int readSize = maskOutputSize[desc];
if (readSize > 4) {
const __m128i result2 = _mm_shuffle_epi8(
data, vecmask[desc][1]); _mm_storeu_si128(
reinterpret_cast<__m128i *>(dst + 4),
result2); }
dst += readSize;
}
while (srclength >= 9) {
unsigned char desc = *src;
src += 1;
srclength -= 1;
char buff[32];
memcpy(buff, src, 8);
const __m128i data =
_mm_lddqu_si128(reinterpret_cast<__m128i const *>(buff));
const __m128i result = _mm_shuffle_epi8(data, vecmask[desc][0]);
_mm_storeu_si128(reinterpret_cast<__m128i *>(buff), result);
int readSize = maskOutputSize[desc];
if (readSize > 4) {
const __m128i result2 = _mm_shuffle_epi8(data, vecmask[desc][1]);
_mm_storeu_si128(reinterpret_cast<__m128i *>(buff + 16), result2);
}
memcpy(dst, buff, 4 * readSize);
dst += readSize;
srclength -= 8;
src += 8;
}
nvalue = (dst - initdst);
return reinterpret_cast<uint32_t *>((reinterpret_cast<uintptr_t>(src) + 3) &
~3);
}
virtual std::string name() const { return std::string("VarIntG8IU"); }
int encodeBlock(const uint32_t *&src, size_t &srclength, unsigned char *&dest,
size_t &dstlength) {
unsigned char desc = 0xFF;
unsigned char bitmask = 0x01;
uint32_t buffer[8];
int ithSize[8];
int length = 0;
int numInt = 0;
while (srclength > 0) {
const uint32_t *temp = src;
int byteNeeded = getNumByteNeeded(*temp);
if (PREDICT_FALSE(length + byteNeeded > 8)) {
break;
}
bitmask = static_cast<unsigned char>(bitmask << (byteNeeded - 1));
desc = desc ^ bitmask;
bitmask = static_cast<unsigned char>(bitmask << 1);
ithSize[numInt] = byteNeeded;
length += byteNeeded;
buffer[numInt] = *temp;
src = src + 1;
srclength -= 4;
numInt++;
}
dest[0] = desc;
int written = 1;
for (int i = 0; i < numInt; i++) {
int size = ithSize[i];
uint32_t value = buffer[i];
for (int j = 0; j < size; j++) {
dest[written] = static_cast<unsigned char>(value >> (j * 8));
written++;
}
}
while (written < 9) {
dest[written++] = 0; }
dest += 9;
dstlength -= 9;
return 9;
}
private:
int maskOutputSize[256];
__m128i vecmask[256][2];
int getNumByteNeeded(const uint32_t val) {
return ((__builtin_clz(val | 255) ^ 31) >> 3) + 1;
}
};
}
#endif #endif