#define HTS_BUILDING_LIBRARY
#include <config.h>
#include <stdlib.h>
#include <string.h>
#include <assert.h>
#include <limits.h>
#include <stdint.h>
#include <errno.h>
#include <stddef.h>
#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
#include "../fuzz_settings.h"
#endif
#include "../htslib/hts_endian.h"
#if defined(HAVE_EXTERNAL_LIBHTSCODECS)
#include <htscodecs/varint.h>
#include <htscodecs/pack.h>
#include <htscodecs/rle.h>
#else
#include "../htscodecs/htscodecs/varint.h"
#include "../htscodecs/htscodecs/pack.h"
#include "../htscodecs/htscodecs/rle.h"
#endif
#include "cram.h"
#if 0#endif
static int get_one_bits_MSB(cram_block *block) {
int n = 0, b;
if (block->byte >= block->uncomp_size)
return -1;
do {
b = block->data[block->byte] >> block->bit;
if (--block->bit == -1) {
block->bit = 7;
block->byte++;
if (block->byte == block->uncomp_size && (b&1))
return -1;
}
n++;
} while (b&1);
return n-1;
}
static int get_zero_bits_MSB(cram_block *block) {
int n = 0, b;
if (block->byte >= block->uncomp_size)
return -1;
do {
b = block->data[block->byte] >> block->bit;
if (--block->bit == -1) {
block->bit = 7;
block->byte++;
if (block->byte == block->uncomp_size && !(b&1))
return -1;
}
n++;
} while (!(b&1));
return n-1;
}
#if 0#endif
#if 0#endif
static inline int64_t get_bits_MSB(cram_block *block, int nbits) {
uint64_t val = 0;
int i;
#if 0#endif
#if 0#endif
#if 1
if (nbits <= block->bit+1) {
val = (block->data[block->byte]>>(block->bit-(nbits-1))) & ((1<<nbits)-1);
if ((block->bit -= nbits) == -1) {
block->bit = 7;
block->byte++;
}
return val;
}
switch(nbits) {
case 8: GET_BIT_MSB(block, val); case 7: GET_BIT_MSB(block, val); case 6: GET_BIT_MSB(block, val); case 5: GET_BIT_MSB(block, val); case 4: GET_BIT_MSB(block, val); case 3: GET_BIT_MSB(block, val); case 2: GET_BIT_MSB(block, val); case 1: GET_BIT_MSB(block, val);
break;
default:
for (i = 0; i < nbits; i++)
GET_BIT_MSB(block, val);
}
#endif
return val;
}
static int store_bits_MSB(cram_block *block, uint64_t val, int nbits) {
unsigned int mask;
if (block->byte+8 >= block->alloc) {
if (block->byte) {
block->alloc *= 2;
block->data = realloc(block->data, block->alloc + 8);
if (!block->data)
return -1;
} else {
block->alloc = 1024;
block->data = realloc(block->data, block->alloc + 8);
if (!block->data)
return -1;
block->data[0] = 0; }
}
if (nbits <= block->bit+1) {
block->data[block->byte] |= (val << (block->bit+1-nbits));
if ((block->bit-=nbits) == -1) {
block->bit = 7;
block->byte++;
block->data[block->byte] = 0;
}
return 0;
}
block->data[block->byte] |= (val >> (nbits -= block->bit+1));
block->bit = 7;
block->byte++;
block->data[block->byte] = 0;
mask = 1<<(nbits-1);
do {
if (val & mask)
block->data[block->byte] |= (1 << block->bit);
if (--block->bit == -1) {
block->bit = 7;
block->byte++;
block->data[block->byte] = 0;
}
mask >>= 1;
} while(--nbits);
return 0;
}
static char *cram_extract_block(cram_block *b, int size) {
char *cp = (char *)b->data + b->idx;
b->idx += size;
if (b->idx > b->uncomp_size)
return NULL;
return cp;
}
int cram_external_decode_int(cram_slice *slice, cram_codec *c,
cram_block *in, char *out, int *out_size) {
char *cp;
cram_block *b;
b = cram_get_block_by_id(slice, c->u.external.content_id);
if (!b)
return *out_size?-1:0;
cp = (char *)b->data + b->idx;
int err = 0;
*(int32_t *)out = c->vv->varint_get32(&cp, (char *)b->data + b->uncomp_size, &err);
b->idx = cp - (char *)b->data;
*out_size = 1;
return err ? -1 : 0;
}
int cram_external_decode_long(cram_slice *slice, cram_codec *c,
cram_block *in, char *out, int *out_size) {
char *cp;
cram_block *b;
b = cram_get_block_by_id(slice, c->u.external.content_id);
if (!b)
return *out_size?-1:0;
cp = (char *)b->data + b->idx;
int err = 0;
*(int64_t *)out = c->vv->varint_get64(&cp, (char *)b->data + b->uncomp_size, &err);
b->idx = cp - (char *)b->data;
*out_size = 1;
return err ? -1 : 0;
}
int cram_external_decode_char(cram_slice *slice, cram_codec *c,
cram_block *in, char *out,
int *out_size) {
char *cp;
cram_block *b;
b = cram_get_block_by_id(slice, c->u.external.content_id);
if (!b)
return *out_size?-1:0;
cp = cram_extract_block(b, *out_size);
if (!cp)
return -1;
if (out)
memcpy(out, cp, *out_size);
return 0;
}
static int cram_external_decode_block(cram_slice *slice, cram_codec *c,
cram_block *in, char *out_,
int *out_size) {
char *cp;
cram_block *out = (cram_block *)out_;
cram_block *b = NULL;
b = cram_get_block_by_id(slice, c->u.external.content_id);
if (!b)
return *out_size?-1:0;
cp = cram_extract_block(b, *out_size);
if (!cp)
return -1;
BLOCK_APPEND(out, cp, *out_size);
return 0;
block_err:
return -1;
}
void cram_external_decode_free(cram_codec *c) {
if (c)
free(c);
}
int cram_external_decode_size(cram_slice *slice, cram_codec *c) {
cram_block *b;
b = cram_get_block_by_id(slice, c->u.external.content_id);
if (!b)
return -1;
return b->uncomp_size;
}
cram_block *cram_external_get_block(cram_slice *slice, cram_codec *c) {
return cram_get_block_by_id(slice, c->u.external.content_id);
}
int cram_external_describe(cram_codec *c, kstring_t *ks) {
return ksprintf(ks, "EXTERNAL(id=%d)",
c->u.external.content_id) < 0 ? -1 : 0;
}
cram_codec *cram_external_decode_init(cram_block_compression_hdr *hdr,
char *data, int size,
enum cram_encoding codec,
enum cram_external_type option,
int version, varint_vec *vv) {
cram_codec *c = NULL;
char *cp = data;
if (size < 1)
goto malformed;
if (!(c = malloc(sizeof(*c))))
return NULL;
c->codec = E_EXTERNAL;
if (CRAM_MAJOR_VERS(version) >= 4) {
switch (codec) {
case E_EXTERNAL:
if (option == E_BYTE_ARRAY_BLOCK)
c->decode = cram_external_decode_block;
else if (option == E_BYTE || option == E_BYTE_ARRAY)
c->decode = cram_external_decode_char;
else
goto malformed;
break;
default:
goto malformed;
}
} else {
if (option == E_INT)
c->decode = cram_external_decode_int;
else if (option == E_LONG)
c->decode = cram_external_decode_long;
else if (option == E_BYTE_ARRAY || option == E_BYTE)
c->decode = cram_external_decode_char;
else
c->decode = cram_external_decode_block;
}
c->free = cram_external_decode_free;
c->size = cram_external_decode_size;
c->get_block = cram_external_get_block;
c->describe = cram_external_describe;
c->u.external.content_id = vv->varint_get32(&cp, data+size, NULL);
if (cp - data != size)
goto malformed;
c->u.external.type = option;
return c;
malformed:
hts_log_error("Malformed external header stream");
free(c);
return NULL;
}
int cram_external_encode_int(cram_slice *slice, cram_codec *c,
char *in, int in_size) {
uint32_t *i32 = (uint32_t *)in;
return c->vv->varint_put32_blk(c->out, *i32) >= 0 ? 0 : -1;
}
int cram_external_encode_sint(cram_slice *slice, cram_codec *c,
char *in, int in_size) {
int32_t *i32 = (int32_t *)in;
return c->vv->varint_put32s_blk(c->out, *i32) >= 0 ? 0 : -1;
}
int cram_external_encode_long(cram_slice *slice, cram_codec *c,
char *in, int in_size) {
uint64_t *i64 = (uint64_t *)in;
return c->vv->varint_put64_blk(c->out, *i64) >= 0 ? 0 : -1;
}
int cram_external_encode_slong(cram_slice *slice, cram_codec *c,
char *in, int in_size) {
int64_t *i64 = (int64_t *)in;
return c->vv->varint_put64s_blk(c->out, *i64) >= 0 ? 0 : -1;
}
int cram_external_encode_char(cram_slice *slice, cram_codec *c,
char *in, int in_size) {
BLOCK_APPEND(c->out, in, in_size);
return 0;
block_err:
return -1;
}
void cram_external_encode_free(cram_codec *c) {
if (!c)
return;
free(c);
}
int cram_external_encode_store(cram_codec *c, cram_block *b, char *prefix,
int version) {
char tmp[99], *tp = tmp, *tpend = tmp+99;
int len = 0, r = 0, n;
if (prefix) {
size_t l = strlen(prefix);
BLOCK_APPEND(b, prefix, l);
len += l;
}
tp += c->vv->varint_put32(tp, tpend, c->u.e_external.content_id);
len += (n = c->vv->varint_put32_blk(b, c->codec)); r |= n;
len += (n = c->vv->varint_put32_blk(b, tp-tmp)); r |= n;
BLOCK_APPEND(b, tmp, tp-tmp);
len += tp-tmp;
if (r > 0)
return len;
block_err:
return -1;
}
cram_codec *cram_external_encode_init(cram_stats *st,
enum cram_encoding codec,
enum cram_external_type option,
void *dat,
int version, varint_vec *vv) {
cram_codec *c;
c = malloc(sizeof(*c));
if (!c)
return NULL;
c->codec = E_EXTERNAL;
c->free = cram_external_encode_free;
if (CRAM_MAJOR_VERS(version) >= 4) {
switch (codec) {
case E_EXTERNAL:
if (option != E_BYTE && option != E_BYTE_ARRAY)
return NULL;
c->encode = cram_external_encode_char;
break;
default:
return NULL;
}
} else {
if (option == E_INT)
c->encode = cram_external_encode_int;
else if (option == E_LONG)
c->encode = cram_external_encode_long;
else if (option == E_BYTE_ARRAY || option == E_BYTE)
c->encode = cram_external_encode_char;
else
abort();
}
c->store = cram_external_encode_store;
c->flush = NULL;
c->u.e_external.content_id = (size_t)dat;
return c;
}
int cram_varint_decode_int(cram_slice *slice, cram_codec *c,
cram_block *in, char *out, int *out_size) {
char *cp;
cram_block *b;
b = cram_get_block_by_id(slice, c->u.varint.content_id);
if (!b)
return *out_size?-1:0;
cp = (char *)b->data + b->idx;
int err = 0;
*(int32_t *)out = c->vv->varint_get32(&cp,
(char *)b->data + b->uncomp_size,
&err) + c->u.varint.offset;
b->idx = cp - (char *)b->data;
*out_size = 1;
return err ? -1 : 0;
}
int cram_varint_decode_sint(cram_slice *slice, cram_codec *c,
cram_block *in, char *out, int *out_size) {
char *cp;
cram_block *b;
b = cram_get_block_by_id(slice, c->u.varint.content_id);
if (!b)
return *out_size?-1:0;
cp = (char *)b->data + b->idx;
int err = 0;
*(int32_t *)out = c->vv->varint_get32s(&cp,
(char *)b->data + b->uncomp_size,
&err) + c->u.varint.offset;
b->idx = cp - (char *)b->data;
*out_size = 1;
return err ? -1 : 0;
}
int cram_varint_decode_long(cram_slice *slice, cram_codec *c,
cram_block *in, char *out, int *out_size) {
char *cp;
cram_block *b;
b = cram_get_block_by_id(slice, c->u.varint.content_id);
if (!b)
return *out_size?-1:0;
cp = (char *)b->data + b->idx;
int err = 0;
*(int64_t *)out = c->vv->varint_get64(&cp,
(char *)b->data + b->uncomp_size,
&err) + c->u.varint.offset;
b->idx = cp - (char *)b->data;
*out_size = 1;
return err ? -1 : 0;
}
int cram_varint_decode_slong(cram_slice *slice, cram_codec *c,
cram_block *in, char *out, int *out_size) {
char *cp;
cram_block *b;
b = cram_get_block_by_id(slice, c->u.varint.content_id);
if (!b)
return *out_size?-1:0;
cp = (char *)b->data + b->idx;
int err = 0;
*(int64_t *)out = c->vv->varint_get64s(&cp,
(char *)b->data + b->uncomp_size,
&err) + c->u.varint.offset;
b->idx = cp - (char *)b->data;
*out_size = 1;
return err ? -1 : 0;
}
void cram_varint_decode_free(cram_codec *c) {
if (c)
free(c);
}
int cram_varint_decode_size(cram_slice *slice, cram_codec *c) {
cram_block *b;
b = cram_get_block_by_id(slice, c->u.varint.content_id);
if (!b)
return -1;
return b->uncomp_size;
}
cram_block *cram_varint_get_block(cram_slice *slice, cram_codec *c) {
return cram_get_block_by_id(slice, c->u.varint.content_id);
}
int cram_varint_describe(cram_codec *c, kstring_t *ks) {
return ksprintf(ks, "VARINT(id=%d,offset=%"PRId64",type=%d)",
c->u.varint.content_id,
c->u.varint.offset,
c->u.varint.type)
< 0 ? -1 : 0;
}
cram_codec *cram_varint_decode_init(cram_block_compression_hdr *hdr,
char *data, int size,
enum cram_encoding codec,
enum cram_external_type option,
int version, varint_vec *vv) {
cram_codec *c;
char *cp = data, *cp_end = data+size;
if (!(c = malloc(sizeof(*c))))
return NULL;
c->codec = codec;
switch(codec) {
case E_VARINT_UNSIGNED:
c->decode = (option == E_INT)
? cram_varint_decode_int
: cram_varint_decode_long;
break;
case E_VARINT_SIGNED:
c->decode = (option == E_INT)
? cram_varint_decode_sint
: cram_varint_decode_slong;
break;
default:
return NULL;
}
c->free = cram_varint_decode_free;
c->size = cram_varint_decode_size;
c->get_block = cram_varint_get_block;
c->describe = cram_varint_describe;
c->u.varint.content_id = vv->varint_get32 (&cp, cp_end, NULL);
c->u.varint.offset = vv->varint_get64s(&cp, cp_end, NULL);
if (cp - data != size) {
fprintf(stderr, "Malformed varint header stream\n");
free(c);
return NULL;
}
c->u.varint.type = option;
return c;
}
int cram_varint_encode_int(cram_slice *slice, cram_codec *c,
char *in, int in_size) {
uint32_t *i32 = (uint32_t *)in;
return c->vv->varint_put32_blk(c->out, *i32 - c->u.varint.offset) >= 0
? 0 : -1;
}
int cram_varint_encode_sint(cram_slice *slice, cram_codec *c,
char *in, int in_size) {
int32_t *i32 = (int32_t *)in;
return c->vv->varint_put32s_blk(c->out, *i32 - c->u.varint.offset) >= 0
? 0 : -1;
}
int cram_varint_encode_long(cram_slice *slice, cram_codec *c,
char *in, int in_size) {
uint64_t *i64 = (uint64_t *)in;
return c->vv->varint_put64_blk(c->out, *i64 - c->u.varint.offset) >= 0
? 0 : -1;
}
int cram_varint_encode_slong(cram_slice *slice, cram_codec *c,
char *in, int in_size) {
int64_t *i64 = (int64_t *)in;
return c->vv->varint_put64s_blk(c->out, *i64 - c->u.varint.offset) >= 0
? 0 : -1;
}
void cram_varint_encode_free(cram_codec *c) {
if (!c)
return;
free(c);
}
int cram_varint_encode_store(cram_codec *c, cram_block *b, char *prefix,
int version) {
char tmp[99], *tp = tmp;
int len = 0;
if (prefix) {
size_t l = strlen(prefix);
BLOCK_APPEND(b, prefix, l);
len += l;
}
tp += c->vv->varint_put32 (tp, NULL, c->u.e_varint.content_id);
tp += c->vv->varint_put64s(tp, NULL, c->u.e_varint.offset);
len += c->vv->varint_put32_blk(b, c->codec);
len += c->vv->varint_put32_blk(b, tp-tmp);
BLOCK_APPEND(b, tmp, tp-tmp);
len += tp-tmp;
return len;
block_err:
return -1;
}
cram_codec *cram_varint_encode_init(cram_stats *st,
enum cram_encoding codec,
enum cram_external_type option,
void *dat,
int version, varint_vec *vv) {
cram_codec *c;
if (!(c = malloc(sizeof(*c))))
return NULL;
c->u.e_varint.offset = 0;
if (st) {
if (st->min_val < 0 && st->min_val >= -127
&& st->max_val / -st->min_val > 100) {
c->u.e_varint.offset = -st->min_val;
codec = E_VARINT_UNSIGNED;
} else if (st->min_val > 0) {
c->u.e_varint.offset = -st->min_val;
}
}
c->codec = codec;
c->free = cram_varint_encode_free;
switch (codec) {
case E_VARINT_UNSIGNED:
c->encode = (option == E_INT)
? cram_varint_encode_int
: cram_varint_encode_long;
break;
case E_VARINT_SIGNED:
c->encode = (option == E_INT)
? cram_varint_encode_sint
: cram_varint_encode_slong;
break;
default:
return NULL;
}
c->store = cram_varint_encode_store;
c->flush = NULL;
c->u.e_varint.content_id = (size_t)dat;
return c;
}
int cram_const_decode_byte(cram_slice *slice, cram_codec *c,
cram_block *in, char *out, int *out_size) {
int i, n;
for (i = 0, n = *out_size; i < n; i++)
out[i] = c->u.xconst.val;
return 0;
}
int cram_const_decode_int(cram_slice *slice, cram_codec *c,
cram_block *in, char *out, int *out_size) {
int32_t *out_i = (int32_t *)out;
int i, n;
for (i = 0, n = *out_size; i < n; i++)
out_i[i] = c->u.xconst.val;
return 0;
}
int cram_const_decode_long(cram_slice *slice, cram_codec *c,
cram_block *in, char *out, int *out_size) {
int64_t *out_i = (int64_t *)out;
int i, n;
for (i = 0, n = *out_size; i < n; i++)
out_i[i] = c->u.xconst.val;
return 0;
}
void cram_const_decode_free(cram_codec *c) {
if (c)
free(c);
}
int cram_const_decode_size(cram_slice *slice, cram_codec *c) {
return 0;
}
int cram_const_describe(cram_codec *c, kstring_t *ks) {
return ksprintf(ks, "CONST(val=%"PRId64")",
c->u.xconst.val) < 0 ? -1 : 0;
}
cram_codec *cram_const_decode_init(cram_block_compression_hdr *hdr,
char *data, int size,
enum cram_encoding codec,
enum cram_external_type option,
int version, varint_vec *vv) {
cram_codec *c;
char *cp = data;
if (!(c = malloc(sizeof(*c))))
return NULL;
c->codec = codec;
if (codec == E_CONST_BYTE)
c->decode = cram_const_decode_byte;
else if (option == E_INT)
c->decode = cram_const_decode_int;
else
c->decode = cram_const_decode_long;
c->free = cram_const_decode_free;
c->size = cram_const_decode_size;
c->get_block = NULL;
c->describe = cram_const_describe;
c->u.xconst.val = vv->varint_get64s(&cp, data+size, NULL);
if (cp - data != size) {
fprintf(stderr, "Malformed const header stream\n");
free(c);
return NULL;
}
return c;
}
int cram_const_encode(cram_slice *slice, cram_codec *c,
char *in, int in_size) {
return 0;
}
int cram_const_encode_store(cram_codec *c, cram_block *b, char *prefix,
int version) {
char tmp[99], *tp = tmp;
int len = 0;
if (prefix) {
size_t l = strlen(prefix);
BLOCK_APPEND(b, prefix, l);
len += l;
}
tp += c->vv->varint_put64s(tp, NULL, c->u.xconst.val);
len += c->vv->varint_put32_blk(b, c->codec);
len += c->vv->varint_put32_blk(b, tp-tmp);
BLOCK_APPEND(b, tmp, tp-tmp);
len += tp-tmp;
return len;
block_err:
return -1;
}
cram_codec *cram_const_encode_init(cram_stats *st,
enum cram_encoding codec,
enum cram_external_type option,
void *dat,
int version, varint_vec *vv) {
cram_codec *c;
if (!(c = malloc(sizeof(*c))))
return NULL;
c->codec = codec;
c->free = cram_const_decode_free; c->encode = cram_const_encode; c->store = cram_const_encode_store;
c->flush = NULL;
c->u.e_xconst.val = st->min_val;
return c;
}
int cram_beta_decode_long(cram_slice *slice, cram_codec *c, cram_block *in, char *out, int *out_size) {
int64_t *out_i = (int64_t *)out;
int i, n = *out_size;
if (c->u.beta.nbits) {
if (cram_not_enough_bits(in, c->u.beta.nbits * n))
return -1;
for (i = 0; i < n; i++)
out_i[i] = get_bits_MSB(in, c->u.beta.nbits) - c->u.beta.offset;
} else {
for (i = 0; i < n; i++)
out_i[i] = -c->u.beta.offset;
}
return 0;
}
int cram_beta_decode_int(cram_slice *slice, cram_codec *c, cram_block *in, char *out, int *out_size) {
int32_t *out_i = (int32_t *)out;
int i, n = *out_size;
if (c->u.beta.nbits) {
if (cram_not_enough_bits(in, c->u.beta.nbits * n))
return -1;
for (i = 0; i < n; i++)
out_i[i] = get_bits_MSB(in, c->u.beta.nbits) - c->u.beta.offset;
} else {
for (i = 0; i < n; i++)
out_i[i] = -c->u.beta.offset;
}
return 0;
}
int cram_beta_decode_char(cram_slice *slice, cram_codec *c, cram_block *in, char *out, int *out_size) {
int i, n = *out_size;
if (c->u.beta.nbits) {
if (cram_not_enough_bits(in, c->u.beta.nbits * n))
return -1;
if (out)
for (i = 0; i < n; i++)
out[i] = get_bits_MSB(in, c->u.beta.nbits) - c->u.beta.offset;
else
for (i = 0; i < n; i++)
get_bits_MSB(in, c->u.beta.nbits);
} else {
if (out)
for (i = 0; i < n; i++)
out[i] = -c->u.beta.offset;
}
return 0;
}
void cram_beta_decode_free(cram_codec *c) {
if (c)
free(c);
}
int cram_beta_describe(cram_codec *c, kstring_t *ks) {
return ksprintf(ks, "BETA(offset=%d, nbits=%d)",
c->u.beta.offset, c->u.beta.nbits)
< 0 ? -1 : 0;
}
cram_codec *cram_beta_decode_init(cram_block_compression_hdr *hdr,
char *data, int size,
enum cram_encoding codec,
enum cram_external_type option,
int version, varint_vec *vv) {
cram_codec *c;
char *cp = data;
if (!(c = malloc(sizeof(*c))))
return NULL;
c->codec = E_BETA;
if (option == E_INT || option == E_SINT)
c->decode = cram_beta_decode_int;
else if (option == E_LONG || option == E_SLONG)
c->decode = cram_beta_decode_long;
else if (option == E_BYTE_ARRAY || option == E_BYTE)
c->decode = cram_beta_decode_char;
else {
hts_log_error("BYTE_ARRAYs not supported by this codec");
free(c);
return NULL;
}
c->free = cram_beta_decode_free;
c->describe = cram_beta_describe;
c->u.beta.nbits = -1;
c->u.beta.offset = vv->varint_get32(&cp, data + size, NULL);
if (cp < data + size) c->u.beta.nbits = vv->varint_get32(&cp, data + size, NULL);
if (cp - data != size
|| c->u.beta.nbits < 0 || c->u.beta.nbits > 8 * sizeof(int)) {
hts_log_error("Malformed beta header stream");
free(c);
return NULL;
}
return c;
}
int cram_beta_encode_store(cram_codec *c, cram_block *b,
char *prefix, int version) {
int len = 0, r = 0, n;
if (prefix) {
size_t l = strlen(prefix);
BLOCK_APPEND(b, prefix, l);
len += l;
}
len += (n = c->vv->varint_put32_blk(b, c->codec)); r |= n;
len += (n = c->vv->varint_put32_blk(b, c->vv->varint_size(c->u.e_beta.offset)
+ c->vv->varint_size(c->u.e_beta.nbits)));
r |= n;
len += (n = c->vv->varint_put32_blk(b, c->u.e_beta.offset)); r |= n;
len += (n = c->vv->varint_put32_blk(b, c->u.e_beta.nbits)); r |= n;
if (r > 0) return len;
block_err:
return -1;
}
int cram_beta_encode_long(cram_slice *slice, cram_codec *c,
char *in, int in_size) {
int64_t *syms = (int64_t *)in;
int i, r = 0;
for (i = 0; i < in_size; i++)
r |= store_bits_MSB(c->out, syms[i] + c->u.e_beta.offset,
c->u.e_beta.nbits);
return r;
}
int cram_beta_encode_int(cram_slice *slice, cram_codec *c,
char *in, int in_size) {
int *syms = (int *)in;
int i, r = 0;
for (i = 0; i < in_size; i++)
r |= store_bits_MSB(c->out, syms[i] + c->u.e_beta.offset,
c->u.e_beta.nbits);
return r;
}
int cram_beta_encode_char(cram_slice *slice, cram_codec *c,
char *in, int in_size) {
unsigned char *syms = (unsigned char *)in;
int i, r = 0;
for (i = 0; i < in_size; i++)
r |= store_bits_MSB(c->out, syms[i] + c->u.e_beta.offset,
c->u.e_beta.nbits);
return r;
}
void cram_beta_encode_free(cram_codec *c) {
if (c) free(c);
}
cram_codec *cram_beta_encode_init(cram_stats *st,
enum cram_encoding codec,
enum cram_external_type option,
void *dat,
int version, varint_vec *vv) {
cram_codec *c;
hts_pos_t min_val, max_val;
int len = 0;
int64_t range;
c = malloc(sizeof(*c));
if (!c)
return NULL;
c->codec = E_BETA;
c->free = cram_beta_encode_free;
if (option == E_INT || option == E_SINT)
c->encode = cram_beta_encode_int;
else if (option == E_LONG || option == E_SLONG)
c->encode = cram_beta_encode_long;
else
c->encode = cram_beta_encode_char;
c->store = cram_beta_encode_store;
c->flush = NULL;
if (dat) {
min_val = ((hts_pos_t *)dat)[0];
max_val = ((hts_pos_t *)dat)[1];
} else {
min_val = INT_MAX;
max_val = INT_MIN;
int i;
for (i = 0; i < MAX_STAT_VAL; i++) {
if (!st->freqs[i])
continue;
if (min_val > i)
min_val = i;
max_val = i;
}
if (st->h) {
khint_t k;
for (k = kh_begin(st->h); k != kh_end(st->h); k++) {
if (!kh_exist(st->h, k))
continue;
i = kh_key(st->h, k);
if (min_val > i)
min_val = i;
if (max_val < i)
max_val = i;
}
}
}
if (max_val < min_val)
goto err;
range = (int64_t) max_val - min_val;
switch (option) {
case E_SINT:
if (min_val < INT_MIN || range > INT_MAX)
goto err;
break;
case E_INT:
if (max_val > UINT_MAX || range > UINT_MAX)
goto err;
break;
default:
break;
}
c->u.e_beta.offset = -min_val;
while (range) {
len++;
range >>= 1;
}
c->u.e_beta.nbits = len;
return c;
err:
free(c);
return NULL;
}
int cram_xpack_decode_long(cram_slice *slice, cram_codec *c, cram_block *in, char *out, int *out_size) {
int64_t *out_i = (int64_t *)out;
int i, n = *out_size;
if (c->u.xpack.nbits) {
for (i = 0; i < n; i++)
out_i[i] = c->u.xpack.rmap[get_bits_MSB(in, c->u.xpack.nbits)];
} else {
for (i = 0; i < n; i++)
out_i[i] = c->u.xpack.rmap[0];
}
return 0;
}
int cram_xpack_decode_int(cram_slice *slice, cram_codec *c, cram_block *in, char *out, int *out_size) {
int32_t *out_i = (int32_t *)out;
int i, n = *out_size;
if (c->u.xpack.nbits) {
if (cram_not_enough_bits(in, c->u.xpack.nbits * n))
return -1;
for (i = 0; i < n; i++)
out_i[i] = c->u.xpack.rmap[get_bits_MSB(in, c->u.xpack.nbits)];
} else {
for (i = 0; i < n; i++)
out_i[i] = c->u.xpack.rmap[0];
}
return 0;
}
static int cram_xpack_decode_expand_char(cram_slice *slice, cram_codec *c) {
cram_block *b = slice->block_by_id[512 + c->codec_id];
if (b)
return 0;
cram_block *sub_b = c->u.xpack.sub_codec->get_block(slice, c->u.xpack.sub_codec);
if (!sub_b)
return -1;
b = slice->block_by_id[512 + c->codec_id] = cram_new_block(0, 0);
if (!b)
return -1;
int n = sub_b->uncomp_size * 8/c->u.xpack.nbits;
BLOCK_GROW(b, n);
b->uncomp_size = n;
uint8_t p[256];
int z;
for (z = 0; z < 256; z++)
p[z] = c->u.xpack.rmap[z];
hts_unpack(sub_b->data, sub_b->uncomp_size, b->data, b->uncomp_size,
8 / c->u.xpack.nbits, p);
return 0;
block_err:
return -1;
}
int cram_xpack_decode_char(cram_slice *slice, cram_codec *c, cram_block *in, char *out, int *out_size) {
if (c->u.xpack.nval > 1) {
cram_xpack_decode_expand_char(slice, c);
cram_block *b = slice->block_by_id[512 + c->codec_id];
if (!b)
return -1;
if (out)
memcpy(out, b->data + b->byte, *out_size);
b->byte += *out_size;
} else {
memset(out, c->u.xpack.rmap[0], *out_size);
}
return 0;
}
void cram_xpack_decode_free(cram_codec *c) {
if (!c) return;
if (c->u.xpack.sub_codec)
c->u.xpack.sub_codec->free(c->u.xpack.sub_codec);
free(c);
}
int cram_xpack_decode_size(cram_slice *slice, cram_codec *c) {
cram_xpack_decode_expand_char(slice, c);
return slice->block_by_id[512 + c->codec_id]->uncomp_size;
}
cram_block *cram_xpack_get_block(cram_slice *slice, cram_codec *c) {
cram_xpack_decode_expand_char(slice, c);
return slice->block_by_id[512 + c->codec_id];
}
cram_codec *cram_xpack_decode_init(cram_block_compression_hdr *hdr,
char *data, int size,
enum cram_encoding codec,
enum cram_external_type option,
int version, varint_vec *vv) {
cram_codec *c;
char *cp = data;
char *endp = data+size;
if (!(c = calloc(1, sizeof(*c))))
return NULL;
c->codec = E_XPACK;
if (option == E_LONG)
c->decode = cram_xpack_decode_long;
else if (option == E_INT)
c->decode = cram_xpack_decode_int;
else if (option == E_BYTE_ARRAY || option == E_BYTE)
c->decode = cram_xpack_decode_char;
else {
fprintf(stderr, "BYTE_ARRAYs not supported by this codec\n");
goto malformed;
}
c->free = cram_xpack_decode_free;
c->size = cram_xpack_decode_size;
c->get_block = cram_xpack_get_block;
c->describe = NULL;
c->u.xpack.nbits = vv->varint_get32(&cp, endp, NULL);
c->u.xpack.nval = vv->varint_get32(&cp, endp, NULL);
if (c->u.xpack.nbits >= 8 || c->u.xpack.nbits < 0 ||
c->u.xpack.nval > 256 || c->u.xpack.nval < 0)
goto malformed;
int i;
for (i = 0; i < c->u.xpack.nval; i++) {
uint32_t v = vv->varint_get32(&cp, endp, NULL);
if (v >= 256)
goto malformed;
c->u.xpack.rmap[i] = v; }
int encoding = vv->varint_get32(&cp, endp, NULL);
int sub_size = vv->varint_get32(&cp, endp, NULL);
if (sub_size < 0 || endp - cp < sub_size)
goto malformed;
c->u.xpack.sub_codec = cram_decoder_init(hdr, encoding, cp, sub_size,
option, version, vv);
if (c->u.xpack.sub_codec == NULL)
goto malformed;
cp += sub_size;
if (cp - data != size
|| c->u.xpack.nbits < 0 || c->u.xpack.nbits > 8 * sizeof(int64_t)) {
malformed:
fprintf(stderr, "Malformed xpack header stream\n");
cram_xpack_decode_free(c);
return NULL;
}
return c;
}
int cram_xpack_encode_flush(cram_codec *c) {
int meta_len;
uint64_t out_len;
uint8_t out_meta[1024];
uint8_t *out = hts_pack(BLOCK_DATA(c->out), BLOCK_SIZE(c->out),
out_meta, &meta_len, &out_len);
if (c->u.e_xpack.sub_codec->encode(NULL, c->u.e_xpack.sub_codec,
(char *)out, out_len))
return -1;
int r = 0;
if (c->u.e_xpack.sub_codec->flush)
r = c->u.e_xpack.sub_codec->flush(c->u.e_xpack.sub_codec);
free(out);
return r;
}
int cram_xpack_encode_store(cram_codec *c, cram_block *b,
char *prefix, int version) {
int len = 0, r = 0, n;
if (prefix) {
size_t l = strlen(prefix);
BLOCK_APPEND(b, prefix, l);
len += l;
}
cram_codec *tc = c->u.e_xpack.sub_codec;
cram_block *tb = cram_new_block(0, 0);
if (!tb)
return -1;
int len2 = tc->store(tc, tb, NULL, version);
len += (n = c->vv->varint_put32_blk(b, c->codec)); r |= n;
int len1 = 0, i;
for (i = 0; i < c->u.e_xpack.nval; i++)
len1 += (n = c->vv->varint_size(c->u.e_xpack.rmap[i])), r |= n;
len += (n = c->vv->varint_put32_blk(b, c->vv->varint_size(c->u.e_xpack.nbits)
+ c->vv->varint_size(c->u.e_xpack.nval)
+ len1 + len2)); r |= n;
len += (n = c->vv->varint_put32_blk(b, c->u.e_xpack.nbits)); r |= n;
len += (n = c->vv->varint_put32_blk(b, c->u.e_xpack.nval)); r |= n;
for (i = 0; i < c->u.e_xpack.nval; i++)
len += (n = c->vv->varint_put32_blk(b, c->u.e_xpack.rmap[i])), r |= n;
BLOCK_APPEND(b, BLOCK_DATA(tb), BLOCK_SIZE(tb));
cram_free_block(tb);
return r > 0 ? len + len2 : -1;
block_err:
return -1;
}
int cram_xpack_encode_long(cram_slice *slice, cram_codec *c,
char *in, int in_size) {
int64_t *syms = (int64_t *)in;
int i, r = 0;
for (i = 0; i < in_size; i++)
r |= store_bits_MSB(c->out, c->u.e_xpack.map[syms[i]], c->u.e_xpack.nbits);
return r;
}
int cram_xpack_encode_int(cram_slice *slice, cram_codec *c,
char *in, int in_size) {
int *syms = (int *)in;
int i, r = 0;
for (i = 0; i < in_size; i++)
r |= store_bits_MSB(c->out, c->u.e_xpack.map[syms[i]], c->u.e_xpack.nbits);
return r;
}
int cram_xpack_encode_char(cram_slice *slice, cram_codec *c,
char *in, int in_size) {
BLOCK_APPEND(c->out, in, in_size);
return 0;
block_err:
return -1;
}
void cram_xpack_encode_free(cram_codec *c) {
if (!c) return;
if (c->u.e_xpack.sub_codec)
c->u.e_xpack.sub_codec->free(c->u.e_xpack.sub_codec);
cram_free_block(c->out);
free(c);
}
cram_codec *cram_xpack_encode_init(cram_stats *st,
enum cram_encoding codec,
enum cram_external_type option,
void *dat,
int version, varint_vec *vv) {
cram_codec *c;
if (!(c = malloc(sizeof(*c))))
return NULL;
c->codec = E_XPACK;
c->free = cram_xpack_encode_free;
if (option == E_LONG)
c->encode = cram_xpack_encode_long;
else if (option == E_INT)
c->encode = cram_xpack_encode_int;
else
c->encode = cram_xpack_encode_char;
c->store = cram_xpack_encode_store;
c->flush = cram_xpack_encode_flush;
cram_xpack_encoder *e = (cram_xpack_encoder *)dat;
c->u.e_xpack.nbits = e->nbits;
c->u.e_xpack.nval = e->nval;
c->u.e_xpack.sub_codec = cram_encoder_init(e->sub_encoding, NULL,
E_BYTE_ARRAY, e->sub_codec_dat,
version, vv);
memcpy(c->u.e_xpack.map, e->map, sizeof(e->map)); int i, n;
for (i = n = 0; i < 256; i++)
if (e->map[i] != -1)
c->u.e_xpack.rmap[n++] = i; if (n != e->nval) {
fprintf(stderr, "Incorrectly specified number of map items in PACK\n");
return NULL;
}
return c;
}
static uint8_t zigzag8 (int8_t x) { return (x << 1) ^ (x >> 7); }
static uint16_t zigzag16(int16_t x) { return (x << 1) ^ (x >> 15); }
static uint32_t zigzag32(int32_t x) { return (x << 1) ^ (x >> 31); }
static int16_t unzigzag16(uint16_t x) { return (x >> 1) ^ -(x & 1); }
static int32_t unzigzag32(uint32_t x) { return (x >> 1) ^ -(x & 1); }
int cram_xdelta_decode_long(cram_slice *slice, cram_codec *c, cram_block *in, char *out, int *out_size) {
return -1;
}
int cram_xdelta_decode_int(cram_slice *slice, cram_codec *c, cram_block *in, char *out, int *out_size) {
uint32_t *out32 = (uint32_t *)out;
int i;
for (i = 0; i < *out_size; i++) {
uint32_t v;
int one = 1;
if (c->u.e_xdelta.sub_codec->decode(slice, c->u.e_xdelta.sub_codec, in,
(char *)&v, &one) < 0)
return -1;
uint32_t d = unzigzag32(v);
c->u.xdelta.last = out32[i] = d + c->u.xdelta.last;
}
return 0;
}
static int cram_xdelta_decode_expand_char(cram_slice *slice, cram_codec *c) {
return -1;
}
int cram_xdelta_decode_char(cram_slice *slice, cram_codec *c, cram_block *in, char *out, int *out_size) {
return -1;
}
static inline int16_t le_int2(int16_t i) {
int16_t s;
i16_to_le(i, (uint8_t *)&s);
return s;
}
int cram_xdelta_decode_block(cram_slice *slice, cram_codec *c, cram_block *in,
char *out_, int *out_size) {
cram_block *out = (cram_block *)out_;
cram_block *b = c->u.e_xdelta.sub_codec->get_block(slice, c->u.e_xdelta.sub_codec);
int i = 0;
const int w = c->u.xdelta.word_size;
uint32_t npad = (w - *out_size%w)%w;
uint32_t out_sz = *out_size + npad;
c->u.xdelta.last = 0;
for (i = 0; i < out_sz; i += w) {
uint16_t v;
char *cp = (char *)b->data + b->byte;
char *cp_end = (char *)b->data + b->uncomp_size;
int err = 0;
v = c->vv->varint_get32(&cp, cp_end, &err);
if (err)
return -1;
b->byte = cp - (char *)b->data;
switch(w) {
case 2: {
int16_t d = unzigzag16(v), z;
c->u.xdelta.last = d + c->u.xdelta.last;
z = le_int2(c->u.xdelta.last);
BLOCK_APPEND(out, &z, 2-npad);
npad = 0;
break;
}
default:
fprintf(stderr, "Unsupported word size by XDELTA\n");
return -1;
}
}
return 0;
block_err:
return -1;
}
void cram_xdelta_decode_free(cram_codec *c) {
if (!c) return;
if (c->u.xdelta.sub_codec)
c->u.xdelta.sub_codec->free(c->u.xdelta.sub_codec);
free(c);
}
int cram_xdelta_decode_size(cram_slice *slice, cram_codec *c) {
cram_xdelta_decode_expand_char(slice, c);
return slice->block_by_id[512 + c->codec_id]->uncomp_size;
}
cram_block *cram_xdelta_get_block(cram_slice *slice, cram_codec *c) {
cram_xdelta_decode_expand_char(slice, c);
return slice->block_by_id[512 + c->codec_id];
}
cram_codec *cram_xdelta_decode_init(cram_block_compression_hdr *hdr,
char *data, int size,
enum cram_encoding codec,
enum cram_external_type option,
int version, varint_vec *vv) {
cram_codec *c;
char *cp = data;
char *endp = data+size;
if (!(c = calloc(1, sizeof(*c))))
return NULL;
c->codec = E_XDELTA;
if (option == E_LONG)
c->decode = cram_xdelta_decode_long;
else if (option == E_INT)
c->decode = cram_xdelta_decode_int;
else if (option == E_BYTE_ARRAY || option == E_BYTE)
c->decode = cram_xdelta_decode_char;
else if (option == E_BYTE_ARRAY_BLOCK) {
option = E_BYTE_ARRAY;
c->decode = cram_xdelta_decode_block;
} else {
free(c);
return NULL;
}
c->free = cram_xdelta_decode_free;
c->size = cram_xdelta_decode_size;
c->get_block = cram_xdelta_get_block;
c->describe = NULL;
c->u.xdelta.word_size = vv->varint_get32(&cp, endp, NULL);
c->u.xdelta.last = 0;
int encoding = vv->varint_get32(&cp, endp, NULL);
int sub_size = vv->varint_get32(&cp, endp, NULL);
if (sub_size < 0 || endp - cp < sub_size)
goto malformed;
c->u.xdelta.sub_codec = cram_decoder_init(hdr, encoding, cp, sub_size,
option, version, vv);
if (c->u.xdelta.sub_codec == NULL)
goto malformed;
cp += sub_size;
if (cp - data != size) {
malformed:
fprintf(stderr, "Malformed xdelta header stream\n");
cram_xdelta_decode_free(c);
return NULL;
}
return c;
}
int cram_xdelta_encode_flush(cram_codec *c) {
int r = -1;
cram_block *b = cram_new_block(0, 0);
if (!b)
return -1;
switch (c->u.e_xdelta.word_size) {
case 2: {
int i, n = BLOCK_SIZE(c->out)/2;;
uint16_t *dat = (uint16_t *)BLOCK_DATA(c->out), last = 0;
if (n*2 < BLOCK_SIZE(c->out)) {
last = *(uint8_t *)dat;
c->vv->varint_put32_blk(b, zigzag16(last));
dat = (uint16_t *)(((uint8_t *)dat)+1);
}
for (i = 0; i < n; i++) {
uint16_t d = dat[i] - last; last = dat[i];
c->vv->varint_put32_blk(b, zigzag16(d));
}
break;
}
case 4: {
int i, n = BLOCK_SIZE(c->out)/4;;
uint32_t *dat = (uint32_t *)BLOCK_DATA(c->out), last = 0;
for (i = 0; i < n; i++) {
uint32_t d = dat[i] - last;
last = dat[i];
c->vv->varint_put32_blk(b, zigzag32(d));
}
break;
}
case 1: {
int i, n = BLOCK_SIZE(c->out);;
uint8_t *dat = (uint8_t *)BLOCK_DATA(c->out), last = 0;
for (i = 0; i < n; i++) {
uint32_t d = dat[i] - last;
last = dat[i];
c->vv->varint_put32_blk(b, zigzag8(d));
}
break;
}
default:
goto err;
}
if (c->u.e_xdelta.sub_codec->encode(NULL, c->u.e_xdelta.sub_codec,
(char *)b->data, b->byte))
goto err;
r = 0;
err:
cram_free_block(b);
return r;
}
int cram_xdelta_encode_store(cram_codec *c, cram_block *b,
char *prefix, int version) {
int len = 0, r = 0, n;
if (prefix) {
size_t l = strlen(prefix);
BLOCK_APPEND(b, prefix, l);
len += l;
}
cram_codec *tc = c->u.e_xdelta.sub_codec;
cram_block *tb = cram_new_block(0, 0);
if (!tb)
return -1;
int len2 = tc->store(tc, tb, NULL, version);
len += (n = c->vv->varint_put32_blk(b, c->codec)); r |= n;
len += (n = c->vv->varint_put32_blk(b, c->vv->varint_size(c->u.e_xdelta.word_size)
+ len2)); r |= n;
len += (n = c->vv->varint_put32_blk(b, c->u.e_xdelta.word_size)); r |= n;
BLOCK_APPEND(b, BLOCK_DATA(tb), BLOCK_SIZE(tb));
cram_free_block(tb);
return r > 0 ? len + len2 : -1;
block_err:
return -1;
}
int cram_xdelta_encode_long(cram_slice *slice, cram_codec *c,
char *in, int in_size) {
return -1;
}
int cram_xdelta_encode_int(cram_slice *slice, cram_codec *c,
char *in, int in_size) {
return -1;
}
int cram_xdelta_encode_char(cram_slice *slice, cram_codec *c,
char *in, int in_size) {
char *dat = malloc(in_size*5);
if (!dat)
return -1;
char *cp = dat, *cp_end = dat + in_size*5;
c->u.e_xdelta.last = 0; if (c->u.e_xdelta.word_size == 2) {
int i, part;
part = in_size%2;
if (part) {
uint16_t z = in[0];
c->u.e_xdelta.last = le_int2(z);
cp += c->vv->varint_put32(cp, cp_end, zigzag16(c->u.e_xdelta.last));
}
uint16_t *in16 = (uint16_t *)(in+part);
for (i = 0; i < in_size/2; i++) {
uint16_t d = le_int2(in16[i]) - c->u.e_xdelta.last;
c->u.e_xdelta.last = le_int2(in16[i]);
cp += c->vv->varint_put32(cp, cp_end, zigzag16(d));
}
}
if (c->u.e_xdelta.sub_codec->encode(slice, c->u.e_xdelta.sub_codec,
(char *)dat, cp-dat)) {
free(dat);
return -1;
}
free(dat);
return 0;
}
void cram_xdelta_encode_free(cram_codec *c) {
if (!c) return;
if (c->u.e_xdelta.sub_codec)
c->u.e_xdelta.sub_codec->free(c->u.e_xdelta.sub_codec);
cram_free_block(c->out);
free(c);
}
cram_codec *cram_xdelta_encode_init(cram_stats *st,
enum cram_encoding codec,
enum cram_external_type option,
void *dat,
int version, varint_vec *vv) {
cram_codec *c;
if (!(c = malloc(sizeof(*c))))
return NULL;
c->codec = E_XDELTA;
c->free = cram_xdelta_encode_free;
if (option == E_LONG)
c->encode = cram_xdelta_encode_long;
else if (option == E_INT)
c->encode = cram_xdelta_encode_int;
else
c->encode = cram_xdelta_encode_char;
c->store = cram_xdelta_encode_store;
c->flush = cram_xdelta_encode_flush;
cram_xdelta_encoder *e = (cram_xdelta_encoder *)dat;
c->u.e_xdelta.word_size = e->word_size;
c->u.e_xdelta.last = 0;
c->u.e_xdelta.sub_codec = cram_encoder_init(e->sub_encoding, NULL,
E_BYTE_ARRAY,
e->sub_codec_dat,
version, vv);
return c;
}
int cram_xrle_decode_long(cram_slice *slice, cram_codec *c, cram_block *in, char *out, int *out_size) {
return -1;
}
int cram_xrle_decode_int(cram_slice *slice, cram_codec *c, cram_block *in, char *out, int *out_size) {
return -1;
}
static int cram_xrle_decode_expand_char(cram_slice *slice, cram_codec *c) {
cram_block *b = slice->block_by_id[512 + c->codec_id];
if (b)
return 0;
b = slice->block_by_id[512 + c->codec_id] = cram_new_block(0, 0);
if (!b)
return -1;
cram_block *lit_b = c->u.xrle.lit_codec->get_block(slice, c->u.xrle.lit_codec);
if (!lit_b)
return -1;
unsigned char *lit_dat = lit_b->data;
unsigned int lit_sz = lit_b->uncomp_size;
unsigned int len_sz = c->u.xrle.len_codec->size(slice, c->u.xrle.len_codec);
cram_block *len_b = c->u.xrle.len_codec->get_block(slice, c->u.xrle.len_codec);
if (!len_b)
return -1;
unsigned char *len_dat = len_b->data;
uint8_t rle_syms[256];
int rle_nsyms = 0;
int i;
for (i = 0; i < 256; i++) {
if (c->u.xrle.rep_score[i] > 0)
rle_syms[rle_nsyms++] = i;
}
uint64_t out_sz;
int nb = var_get_u64(len_dat, len_dat+len_sz, &out_sz);
if (!(b->data = malloc(out_sz)))
return -1;
hts_rle_decode(lit_dat, lit_sz,
len_dat+nb, len_sz-nb,
rle_syms, rle_nsyms,
b->data, &out_sz);
b->uncomp_size = out_sz;
return 0;
}
int cram_xrle_decode_size(cram_slice *slice, cram_codec *c) {
cram_xrle_decode_expand_char(slice, c);
return slice->block_by_id[512 + c->codec_id]->uncomp_size;
}
cram_block *cram_xrle_get_block(cram_slice *slice, cram_codec *c) {
cram_xrle_decode_expand_char(slice, c);
return slice->block_by_id[512 + c->codec_id];
}
int cram_xrle_decode_char(cram_slice *slice, cram_codec *c, cram_block *in, char *out, int *out_size) {
int n = *out_size;
cram_xrle_decode_expand_char(slice, c);
cram_block *b = slice->block_by_id[512 + c->codec_id];
memcpy(out, b->data + b->idx, n);
b->idx += n;
return 0;
while (n > 0) {
if (c->u.xrle.cur_len == 0) {
unsigned char lit;
int one = 1;
if (c->u.xrle.lit_codec->decode(slice, c->u.xrle.lit_codec, in,
(char *)&lit, &one) < 0)
return -1;
c->u.xrle.cur_lit = lit;
if (c->u.xrle.rep_score[lit] > 0) {
if (c->u.xrle.len_codec->decode(slice, c->u.xrle.len_codec, in,
(char *)&c->u.xrle.cur_len, &one) < 0)
return -1;
}
c->u.xrle.cur_len++;
}
if (n >= c->u.xrle.cur_len) {
memset(out, c->u.xrle.cur_lit, c->u.xrle.cur_len);
out += c->u.xrle.cur_len;
n -= c->u.xrle.cur_len;
c->u.xrle.cur_len = 0;
} else {
memset(out, c->u.xrle.cur_lit, n);
out += n;
c->u.xrle.cur_len -= n;
n = 0;
}
}
return 0;
}
void cram_xrle_decode_free(cram_codec *c) {
if (!c) return;
if (c->u.xrle.len_codec)
c->u.xrle.len_codec->free(c->u.xrle.len_codec);
if (c->u.xrle.lit_codec)
c->u.xrle.lit_codec->free(c->u.xrle.lit_codec);
free(c);
}
cram_codec *cram_xrle_decode_init(cram_block_compression_hdr *hdr,
char *data, int size,
enum cram_encoding codec,
enum cram_external_type option,
int version, varint_vec *vv) {
cram_codec *c;
char *cp = data;
char *endp = data+size;
int err = 0;
if (!(c = calloc(1, sizeof(*c))))
return NULL;
c->codec = E_XRLE;
if (option == E_LONG)
c->decode = cram_xrle_decode_long;
else if (option == E_INT)
c->decode = cram_xrle_decode_int;
else if (option == E_BYTE_ARRAY || option == E_BYTE)
c->decode = cram_xrle_decode_char;
else {
fprintf(stderr, "BYTE_ARRAYs not supported by this codec\n");
free(c);
return NULL;
}
c->free = cram_xrle_decode_free;
c->size = cram_xrle_decode_size;
c->get_block = cram_xrle_get_block;
c->describe = NULL;
c->u.xrle.cur_len = 0;
c->u.xrle.cur_lit = -1;
int i, j, nrle = vv->varint_get32(&cp, endp, &err);
memset(c->u.xrle.rep_score, 0, 256*sizeof(*c->u.xrle.rep_score));
for (i = 0; i < nrle && i < 256; i++) {
j = vv->varint_get32(&cp, endp, &err);
if (j >= 0 && j < 256)
c->u.xrle.rep_score[j] = 1;
}
c->u.xrle.len_encoding = vv->varint_get32(&cp, endp, &err);
int sub_size = vv->varint_get32(&cp, endp, &err);
if (sub_size < 0 || endp - cp < sub_size)
goto malformed;
c->u.xrle.len_codec = cram_decoder_init(hdr, c->u.xrle.len_encoding,
cp, sub_size, E_INT, version, vv);
if (c->u.xrle.len_codec == NULL)
goto malformed;
cp += sub_size;
c->u.xrle.lit_encoding = vv->varint_get32(&cp, endp, &err);
sub_size = vv->varint_get32(&cp, endp, &err);
if (sub_size < 0 || endp - cp < sub_size)
goto malformed;
c->u.xrle.lit_codec = cram_decoder_init(hdr, c->u.xrle.lit_encoding,
cp, sub_size, option, version, vv);
if (c->u.xrle.lit_codec == NULL)
goto malformed;
cp += sub_size;
if (err)
goto malformed;
return c;
malformed:
fprintf(stderr, "Malformed xrle header stream\n");
cram_xrle_decode_free(c);
return NULL;
}
int cram_xrle_encode_flush(cram_codec *c) {
uint8_t *out_lit, *out_len;
uint64_t out_lit_size, out_len_size;
uint8_t rle_syms[256];
int rle_nsyms = 0, i;
for (i = 0; i < 256; i++)
if (c->u.e_xrle.rep_score[i] > 0)
rle_syms[rle_nsyms++] = i;
if (!c->u.e_xrle.to_flush) {
c->u.e_xrle.to_flush = (char *)BLOCK_DATA(c->out);
c->u.e_xrle.to_flush_size = BLOCK_SIZE(c->out);
}
out_len = malloc(c->u.e_xrle.to_flush_size+8);
if (!out_len)
return -1;
int nb = var_put_u64(out_len, NULL, c->u.e_xrle.to_flush_size);
out_lit = hts_rle_encode((uint8_t *)c->u.e_xrle.to_flush, c->u.e_xrle.to_flush_size,
out_len+nb, &out_len_size,
rle_syms, &rle_nsyms,
NULL, &out_lit_size);
out_len_size += nb;
if (c->u.e_xrle.len_codec->encode(NULL,
c->u.e_xrle.len_codec,
(char *)out_len, out_len_size))
return -1;
if (c->u.e_xrle.lit_codec->encode(NULL,
c->u.e_xrle.lit_codec,
(char *)out_lit, out_lit_size))
return -1;
free(out_len);
free(out_lit);
return 0;
}
int cram_xrle_encode_store(cram_codec *c, cram_block *b,
char *prefix, int version) {
int len = 0, r = 0, n;
cram_codec *tc;
cram_block *b_rle, *b_len, *b_lit;
if (prefix) {
size_t l = strlen(prefix);
BLOCK_APPEND(b, prefix, l);
len += l;
}
b_rle = cram_new_block(0, 0);
if (!b_rle)
return -1;
int i, nrle = 0, len1 = 0;
for (i = 0; i < 256; i++) {
if (c->u.e_xrle.rep_score[i] > 0) {
nrle++;
len1 += (n = c->vv->varint_put32_blk(b_rle,i)); r |= n;
}
}
tc = c->u.e_xrle.len_codec;
b_len = cram_new_block(0, 0);
if (!b_len)
return -1;
int len2 = tc->store(tc, b_len, NULL, version);
tc = c->u.e_xrle.lit_codec;
b_lit = cram_new_block(0, 0);
if (!b_lit)
return -1;
int len3 = tc->store(tc, b_lit, NULL, version);
len += (n = c->vv->varint_put32_blk(b, c->codec)); r |= n;
len += (n = c->vv->varint_put32_blk(b, len1 + len2 + len3
+ c->vv->varint_size(nrle))); r |= n;
len += (n = c->vv->varint_put32_blk(b, nrle)); r |= n;
BLOCK_APPEND(b, BLOCK_DATA(b_rle), BLOCK_SIZE(b_rle));
BLOCK_APPEND(b, BLOCK_DATA(b_len), BLOCK_SIZE(b_len));
BLOCK_APPEND(b, BLOCK_DATA(b_lit), BLOCK_SIZE(b_lit));
cram_free_block(b_rle);
cram_free_block(b_len);
cram_free_block(b_lit);
if (r > 0)
return len + len1 + len2 + len3;
block_err:
return -1;
}
int cram_xrle_encode_long(cram_slice *slice, cram_codec *c,
char *in, int in_size) {
return -1;
}
int cram_xrle_encode_int(cram_slice *slice, cram_codec *c,
char *in, int in_size) {
return -1;
}
int cram_xrle_encode_char(cram_slice *slice, cram_codec *c,
char *in, int in_size) {
if (c->u.e_xrle.to_flush) {
if (!c->out && !(c->out = cram_new_block(0, 0)))
return -1;
BLOCK_APPEND(c->out, c->u.e_xrle.to_flush, c->u.e_xrle.to_flush_size);
c->u.e_xrle.to_flush = NULL;
c->u.e_xrle.to_flush_size = 0;
}
if (c->out && BLOCK_SIZE(c->out) > 0) {
BLOCK_APPEND(c->out, in, in_size);
return 0;
}
c->u.e_xrle.to_flush = in;
c->u.e_xrle.to_flush_size = in_size;
return 0;
block_err:
return -1;
}
void cram_xrle_encode_free(cram_codec *c) {
if (!c) return;
if (c->u.e_xrle.len_codec)
c->u.e_xrle.len_codec->free(c->u.e_xrle.len_codec);
if (c->u.e_xrle.lit_codec)
c->u.e_xrle.lit_codec->free(c->u.e_xrle.lit_codec);
cram_free_block(c->out);
free(c);
}
cram_codec *cram_xrle_encode_init(cram_stats *st,
enum cram_encoding codec,
enum cram_external_type option,
void *dat,
int version, varint_vec *vv) {
cram_codec *c;
if (!(c = malloc(sizeof(*c))))
return NULL;
c->codec = E_XRLE;
c->free = cram_xrle_encode_free;
if (option == E_LONG)
c->encode = cram_xrle_encode_long;
else if (option == E_INT)
c->encode = cram_xrle_encode_int;
else
c->encode = cram_xrle_encode_char;
c->store = cram_xrle_encode_store;
c->flush = cram_xrle_encode_flush;
cram_xrle_encoder *e = (cram_xrle_encoder *)dat;
c->u.e_xrle.len_codec = cram_encoder_init(e->len_encoding, NULL,
E_BYTE, e->len_dat,
version, vv);
c->u.e_xrle.lit_codec = cram_encoder_init(e->lit_encoding, NULL,
E_BYTE, e->lit_dat,
version, vv);
c->u.e_xrle.cur_lit = -1;
c->u.e_xrle.cur_len = -1;
c->u.e_xrle.to_flush = NULL;
c->u.e_xrle.to_flush_size = 0;
memcpy(c->u.e_xrle.rep_score, e->rep_score, 256*sizeof(*c->u.e_xrle.rep_score));
return c;
}
int cram_subexp_decode(cram_slice *slice, cram_codec *c, cram_block *in, char *out, int *out_size) {
int32_t *out_i = (int32_t *)out;
int n, count;
int k = c->u.subexp.k;
for (count = 0, n = *out_size; count < n; count++) {
int i = 0, tail;
int val;
i = get_one_bits_MSB(in);
if (i < 0 || cram_not_enough_bits(in, i > 0 ? i + k - 1 : k))
return -1;
if (i) {
tail = i + k-1;
val = 0;
while (tail) {
GET_BIT_MSB(in, val);
tail--;
}
val += 1 << (i + k-1);
} else {
tail = k;
val = 0;
while (tail) {
GET_BIT_MSB(in, val);
tail--;
}
}
out_i[count] = val - c->u.subexp.offset;
}
return 0;
}
void cram_subexp_decode_free(cram_codec *c) {
if (c)
free(c);
}
int cram_subexp_describe(cram_codec *c, kstring_t *ks) {
return ksprintf(ks, "SUBEXP(offset=%d,k=%d)",
c->u.subexp.offset,
c->u.subexp.k)
< 0 ? -1 : 0;
}
cram_codec *cram_subexp_decode_init(cram_block_compression_hdr *hdr,
char *data, int size,
enum cram_encoding codec,
enum cram_external_type option,
int version, varint_vec *vv) {
cram_codec *c;
char *cp = data;
if (option != E_INT) {
hts_log_error("This codec only supports INT encodings");
return NULL;
}
if (!(c = malloc(sizeof(*c))))
return NULL;
c->codec = E_SUBEXP;
c->decode = cram_subexp_decode;
c->free = cram_subexp_decode_free;
c->describe = cram_subexp_describe;
c->u.subexp.k = -1;
c->u.subexp.offset = vv->varint_get32(&cp, data + size, NULL);
c->u.subexp.k = vv->varint_get32(&cp, data + size, NULL);
if (cp - data != size || c->u.subexp.k < 0) {
hts_log_error("Malformed subexp header stream");
free(c);
return NULL;
}
return c;
}
int cram_gamma_decode(cram_slice *slice, cram_codec *c, cram_block *in, char *out, int *out_size) {
int32_t *out_i = (int32_t *)out;
int i, n;
for (i = 0, n = *out_size; i < n; i++) {
int nz = 0;
int val;
nz = get_zero_bits_MSB(in);
if (cram_not_enough_bits(in, nz))
return -1;
val = 1;
while (nz > 0) {
GET_BIT_MSB(in, val);
nz--;
}
out_i[i] = val - c->u.gamma.offset;
}
return 0;
}
void cram_gamma_decode_free(cram_codec *c) {
if (c)
free(c);
}
int cram_gamma_describe(cram_codec *c, kstring_t *ks) {
return ksprintf(ks, "GAMMA(offset=%d)", c->u.subexp.offset)
< 0 ? -1 : 0;
}
cram_codec *cram_gamma_decode_init(cram_block_compression_hdr *hdr,
char *data, int size,
enum cram_encoding codec,
enum cram_external_type option,
int version, varint_vec *vv) {
cram_codec *c = NULL;
char *cp = data;
if (option != E_INT) {
hts_log_error("This codec only supports INT encodings");
return NULL;
}
if (size < 1)
goto malformed;
if (!(c = malloc(sizeof(*c))))
return NULL;
c->codec = E_GAMMA;
c->decode = cram_gamma_decode;
c->free = cram_gamma_decode_free;
c->describe = cram_gamma_describe;
c->u.gamma.offset = vv->varint_get32(&cp, data+size, NULL);
if (cp - data != size)
goto malformed;
return c;
malformed:
hts_log_error("Malformed gamma header stream");
free(c);
return NULL;
}
static int code_sort(const void *vp1, const void *vp2) {
const cram_huffman_code *c1 = (const cram_huffman_code *)vp1;
const cram_huffman_code *c2 = (const cram_huffman_code *)vp2;
if (c1->len != c2->len)
return c1->len - c2->len;
else
return c1->symbol < c2->symbol ? -1 : (c1->symbol > c2->symbol ? 1 : 0);
}
void cram_huffman_decode_free(cram_codec *c) {
if (!c)
return;
if (c->u.huffman.codes)
free(c->u.huffman.codes);
free(c);
}
int cram_huffman_decode_null(cram_slice *slice, cram_codec *c,
cram_block *in, char *out, int *out_size) {
return -1;
}
int cram_huffman_decode_char0(cram_slice *slice, cram_codec *c,
cram_block *in, char *out, int *out_size) {
int i, n;
if (!out)
return 0;
for (i = 0, n = *out_size; i < n; i++) {
out[i] = c->u.huffman.codes[0].symbol;
}
return 0;
}
int cram_huffman_decode_char(cram_slice *slice, cram_codec *c,
cram_block *in, char *out, int *out_size) {
int i, n, ncodes = c->u.huffman.ncodes;
const cram_huffman_code * const codes = c->u.huffman.codes;
for (i = 0, n = *out_size; i < n; i++) {
int idx = 0;
int val = 0, len = 0, last_len = 0;
for (;;) {
int dlen = codes[idx].len - last_len;
if (cram_not_enough_bits(in, dlen))
return -1;
last_len = (len += dlen);
for (; dlen; dlen--) GET_BIT_MSB(in, val);
idx = val - codes[idx].p;
if (idx >= ncodes || idx < 0)
return -1;
if (codes[idx].code == val && codes[idx].len == len) {
if (out) out[i] = codes[idx].symbol;
break;
}
}
}
return 0;
}
int cram_huffman_decode_int0(cram_slice *slice, cram_codec *c,
cram_block *in, char *out, int *out_size) {
int32_t *out_i = (int32_t *)out;
int i, n;
const cram_huffman_code * const codes = c->u.huffman.codes;
for (i = 0, n = *out_size; i < n; i++) {
out_i[i] = codes[0].symbol;
}
return 0;
}
int cram_huffman_decode_int(cram_slice *slice, cram_codec *c,
cram_block *in, char *out, int *out_size) {
int32_t *out_i = (int32_t *)out;
int i, n, ncodes = c->u.huffman.ncodes;
const cram_huffman_code * const codes = c->u.huffman.codes;
for (i = 0, n = *out_size; i < n; i++) {
int idx = 0;
int val = 0, len = 0, last_len = 0;
for (;;) {
int dlen = codes[idx].len - last_len;
if (cram_not_enough_bits(in, dlen))
return -1;
last_len = (len += dlen);
for (; dlen; dlen--) GET_BIT_MSB(in, val);
idx = val - codes[idx].p;
if (idx >= ncodes || idx < 0)
return -1;
if (codes[idx].code == val && codes[idx].len == len) {
out_i[i] = codes[idx].symbol;
break;
}
}
}
return 0;
}
int cram_huffman_decode_long0(cram_slice *slice, cram_codec *c,
cram_block *in, char *out, int *out_size) {
int64_t *out_i = (int64_t *)out;
int i, n;
const cram_huffman_code * const codes = c->u.huffman.codes;
for (i = 0, n = *out_size; i < n; i++) {
out_i[i] = codes[0].symbol;
}
return 0;
}
int cram_huffman_decode_long(cram_slice *slice, cram_codec *c,
cram_block *in, char *out, int *out_size) {
int64_t *out_i = (int64_t *)out;
int i, n, ncodes = c->u.huffman.ncodes;
const cram_huffman_code * const codes = c->u.huffman.codes;
for (i = 0, n = *out_size; i < n; i++) {
int idx = 0;
int val = 0, len = 0, last_len = 0;
for (;;) {
int dlen = codes[idx].len - last_len;
if (cram_not_enough_bits(in, dlen))
return -1;
last_len = (len += dlen);
for (; dlen; dlen--) GET_BIT_MSB(in, val);
idx = val - codes[idx].p;
if (idx >= ncodes || idx < 0)
return -1;
if (codes[idx].code == val && codes[idx].len == len) {
out_i[i] = codes[idx].symbol;
break;
}
}
}
return 0;
}
int cram_huffman_describe(cram_codec *c, kstring_t *ks) {
int r = 0, n;
r |= ksprintf(ks, "HUFFMAN(codes={") < 0;
for (n = 0; n < c->u.huffman.ncodes; n++) {
r |= ksprintf(ks, "%s%"PRId64, n?",":"",
c->u.huffman.codes[n].symbol);
}
r |= ksprintf(ks, "},lengths={") < 0;
for (n = 0; n < c->u.huffman.ncodes; n++) {
r |= ksprintf(ks, "%s%d", n?",":"",
c->u.huffman.codes[n].len);
}
r |= ksprintf(ks, "})") < 0;
return r;
}
cram_codec *cram_huffman_decode_init(cram_block_compression_hdr *hdr,
char *data, int size,
enum cram_encoding codec,
enum cram_external_type option,
int version, varint_vec *vv) {
int32_t ncodes = 0, i, j;
char *cp = data, *data_end = &data[size];
cram_codec *h;
cram_huffman_code *codes = NULL;
int32_t val, last_len, max_len = 0;
uint32_t max_val; const int max_code_bits = sizeof(val) * 8 - 1;
int err = 0;
if (option == E_BYTE_ARRAY_BLOCK) {
hts_log_error("BYTE_ARRAYs not supported by this codec");
return NULL;
}
ncodes = vv->varint_get32(&cp, data_end, &err);
if (ncodes < 0) {
hts_log_error("Invalid number of symbols in huffman stream");
return NULL;
}
if (ncodes >= SIZE_MAX / sizeof(*codes)) {
errno = ENOMEM;
return NULL;
}
#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
if (ncodes > FUZZ_ALLOC_LIMIT / sizeof(*codes)) {
errno = ENOMEM;
return NULL;
}
#endif
h = calloc(1, sizeof(*h));
if (!h)
return NULL;
h->codec = E_HUFFMAN;
h->free = cram_huffman_decode_free;
h->u.huffman.ncodes = ncodes;
h->u.huffman.option = option;
if (ncodes) {
codes = h->u.huffman.codes = malloc(ncodes * sizeof(*codes));
if (!codes) {
free(h);
return NULL;
}
} else {
codes = h->u.huffman.codes = NULL;
}
if (option == E_LONG) {
for (i = 0; i < ncodes; i++)
codes[i].symbol = vv->varint_get64(&cp, data_end, &err);
} else if (option == E_INT || option == E_BYTE) {
for (i = 0; i < ncodes; i++)
codes[i].symbol = vv->varint_get32(&cp, data_end, &err);
} else {
goto malformed;
}
if (err)
goto malformed;
i = vv->varint_get32(&cp, data_end, &err);
if (i != ncodes)
goto malformed;
if (ncodes == 0) {
h->decode = cram_huffman_decode_null;
return h;
}
for (i = 0; i < ncodes; i++) {
codes[i].len = vv->varint_get32(&cp, data_end, &err);
if (err)
break;
if (codes[i].len < 0) {
hts_log_error("Huffman code length (%d) is negative", codes[i].len);
goto malformed;
}
if (max_len < codes[i].len)
max_len = codes[i].len;
}
if (err || cp - data != size || max_len >= ncodes)
goto malformed;
if (max_len > max_code_bits) {
hts_log_error("Huffman code length (%d) is greater "
"than maximum supported (%d)", max_len, max_code_bits);
goto malformed;
}
qsort(codes, ncodes, sizeof(*codes), code_sort);
val = -1, last_len = 0, max_val = 0;
for (i = 0; i < ncodes; i++) {
val++;
if (val > max_val)
goto malformed;
if (codes[i].len > last_len) {
val <<= (codes[i].len - last_len);
last_len = codes[i].len;
max_val = (1U << codes[i].len) - 1;
}
codes[i].code = val;
}
last_len = 0;
for (i = j = 0; i < ncodes; i++) {
if (codes[i].len > last_len) {
j = codes[i].code - i;
last_len = codes[i].len;
}
codes[i].p = j;
}
if (option == E_BYTE || option == E_BYTE_ARRAY) {
if (h->u.huffman.codes[0].len == 0)
h->decode = cram_huffman_decode_char0;
else
h->decode = cram_huffman_decode_char;
} else if (option == E_LONG || option == E_SLONG) {
if (h->u.huffman.codes[0].len == 0)
h->decode = cram_huffman_decode_long0;
else
h->decode = cram_huffman_decode_long;
} else if (option == E_INT || option == E_SINT || option == E_BYTE) {
if (h->u.huffman.codes[0].len == 0)
h->decode = cram_huffman_decode_int0;
else
h->decode = cram_huffman_decode_int;
} else {
return NULL;
}
h->describe = cram_huffman_describe;
return (cram_codec *)h;
malformed:
hts_log_error("Malformed huffman header stream");
free(codes);
free(h);
return NULL;
}
int cram_huffman_encode_char0(cram_slice *slice, cram_codec *c,
char *in, int in_size) {
return 0;
}
int cram_huffman_encode_char(cram_slice *slice, cram_codec *c,
char *in, int in_size) {
int i, code, len, r = 0;
unsigned char *syms = (unsigned char *)in;
while (in_size--) {
int sym = *syms++;
if (sym >= -1 && sym < MAX_HUFF) {
i = c->u.e_huffman.val2code[sym+1];
assert(c->u.e_huffman.codes[i].symbol == sym);
code = c->u.e_huffman.codes[i].code;
len = c->u.e_huffman.codes[i].len;
} else {
for (i = 0; i < c->u.e_huffman.nvals; i++) {
if (c->u.e_huffman.codes[i].symbol == sym)
break;
}
if (i == c->u.e_huffman.nvals)
return -1;
code = c->u.e_huffman.codes[i].code;
len = c->u.e_huffman.codes[i].len;
}
r |= store_bits_MSB(c->out, code, len);
}
return r;
}
int cram_huffman_encode_int0(cram_slice *slice, cram_codec *c,
char *in, int in_size) {
return 0;
}
int cram_huffman_encode_int(cram_slice *slice, cram_codec *c,
char *in, int in_size) {
int i, code, len, r = 0;
int *syms = (int *)in;
while (in_size--) {
int sym = *syms++;
if (sym >= -1 && sym < MAX_HUFF) {
i = c->u.e_huffman.val2code[sym+1];
assert(c->u.e_huffman.codes[i].symbol == sym);
code = c->u.e_huffman.codes[i].code;
len = c->u.e_huffman.codes[i].len;
} else {
for (i = 0; i < c->u.e_huffman.nvals; i++) {
if (c->u.e_huffman.codes[i].symbol == sym)
break;
}
if (i == c->u.e_huffman.nvals)
return -1;
code = c->u.e_huffman.codes[i].code;
len = c->u.e_huffman.codes[i].len;
}
r |= store_bits_MSB(c->out, code, len);
}
return r;
}
int cram_huffman_encode_long0(cram_slice *slice, cram_codec *c,
char *in, int in_size) {
return 0;
}
int cram_huffman_encode_long(cram_slice *slice, cram_codec *c,
char *in, int in_size) {
int i, code, len, r = 0;
int64_t *syms = (int64_t *)in;
while (in_size--) {
int sym = *syms++;
if (sym >= -1 && sym < MAX_HUFF) {
i = c->u.e_huffman.val2code[sym+1];
assert(c->u.e_huffman.codes[i].symbol == sym);
code = c->u.e_huffman.codes[i].code;
len = c->u.e_huffman.codes[i].len;
} else {
for (i = 0; i < c->u.e_huffman.nvals; i++) {
if (c->u.e_huffman.codes[i].symbol == sym)
break;
}
if (i == c->u.e_huffman.nvals)
return -1;
code = c->u.e_huffman.codes[i].code;
len = c->u.e_huffman.codes[i].len;
}
r |= store_bits_MSB(c->out, code, len);
}
return r;
}
void cram_huffman_encode_free(cram_codec *c) {
if (!c)
return;
if (c->u.e_huffman.codes)
free(c->u.e_huffman.codes);
free(c);
}
int cram_huffman_encode_store(cram_codec *c, cram_block *b, char *prefix,
int version) {
int i, len = 0, r = 0, n;
cram_huffman_code *codes = c->u.e_huffman.codes;
char *tmp = malloc(6*c->u.e_huffman.nvals+16);
char *tp = tmp, *tpend = tmp+6*c->u.e_huffman.nvals+16;
if (!tmp)
return -1;
if (prefix) {
size_t l = strlen(prefix);
BLOCK_APPEND(b, prefix, l);
len += l;
}
tp += c->vv->varint_put32(tp, tpend, c->u.e_huffman.nvals);
if (c->u.e_huffman.option == E_LONG) {
for (i = 0; i < c->u.e_huffman.nvals; i++) {
tp += c->vv->varint_put64(tp, tpend, codes[i].symbol);
}
} else if (c->u.e_huffman.option == E_SLONG) {
for (i = 0; i < c->u.e_huffman.nvals; i++) {
tp += c->vv->varint_put64s(tp, tpend, codes[i].symbol);
}
} else if (c->u.e_huffman.option == E_INT || c->u.e_huffman.option == E_BYTE) {
for (i = 0; i < c->u.e_huffman.nvals; i++) {
tp += c->vv->varint_put32(tp, tpend, codes[i].symbol);
}
} else if (c->u.e_huffman.option == E_SINT) {
for (i = 0; i < c->u.e_huffman.nvals; i++) {
tp += c->vv->varint_put32s(tp, tpend, codes[i].symbol);
}
} else {
return -1;
}
tp += c->vv->varint_put32(tp, tpend, c->u.e_huffman.nvals);
for (i = 0; i < c->u.e_huffman.nvals; i++)
tp += c->vv->varint_put32(tp, tpend, codes[i].len);
len += (n = c->vv->varint_put32_blk(b, c->codec)); r |= n;
len += (n = c->vv->varint_put32_blk(b, tp-tmp)); r |= n;
BLOCK_APPEND(b, tmp, tp-tmp);
len += tp-tmp;
free(tmp);
if (r > 0)
return len;
block_err:
return -1;
}
cram_codec *cram_huffman_encode_init(cram_stats *st,
enum cram_encoding codec,
enum cram_external_type option,
void *dat,
int version, varint_vec *vv) {
int *vals = NULL, *freqs = NULL, *lens = NULL, code, len;
int *new_vals, *new_freqs;
int i, max_val = 0, min_val = INT_MAX, k;
size_t nvals, vals_alloc = 0;
cram_codec *c;
cram_huffman_code *codes;
c = malloc(sizeof(*c));
if (!c)
return NULL;
c->codec = E_HUFFMAN;
for (nvals = i = 0; i < MAX_STAT_VAL; i++) {
if (!st->freqs[i])
continue;
if (nvals >= vals_alloc) {
vals_alloc = vals_alloc ? vals_alloc*2 : 1024;
new_vals = realloc(vals, vals_alloc * sizeof(int));
if (!new_vals) goto nomem;
vals = new_vals;
new_freqs = realloc(freqs, vals_alloc * sizeof(int));
if (!new_freqs) goto nomem;
freqs = new_freqs;
}
vals[nvals] = i;
freqs[nvals] = st->freqs[i];
assert(st->freqs[i] > 0);
if (max_val < i) max_val = i;
if (min_val > i) min_val = i;
nvals++;
}
if (st->h) {
khint_t k;
for (k = kh_begin(st->h); k != kh_end(st->h); k++) {
if (!kh_exist(st->h, k))
continue;
if (nvals >= vals_alloc) {
vals_alloc = vals_alloc ? vals_alloc*2 : 1024;
new_vals = realloc(vals, vals_alloc * sizeof(int));
if (!new_vals) goto nomem;
vals = new_vals;
new_freqs = realloc(freqs, vals_alloc * sizeof(int));
if (!new_freqs) goto nomem;
freqs = new_freqs;
}
vals[nvals]= kh_key(st->h, k);
freqs[nvals] = kh_val(st->h, k);
assert(freqs[nvals] > 0);
if (max_val < i) max_val = i;
if (min_val > i) min_val = i;
nvals++;
}
}
assert(nvals > 0);
new_freqs = realloc(freqs, 2*nvals*sizeof(*freqs));
if (!new_freqs) goto nomem;
freqs = new_freqs;
lens = calloc(2*nvals, sizeof(*lens));
if (!lens) goto nomem;
for (;;) {
int low1 = INT_MAX, low2 = INT_MAX;
int ind1 = 0, ind2 = 0;
for (i = 0; i < nvals; i++) {
if (freqs[i] < 0)
continue;
if (low1 > freqs[i])
low2 = low1, ind2 = ind1, low1 = freqs[i], ind1 = i;
else if (low2 > freqs[i])
low2 = freqs[i], ind2 = i;
}
if (low2 == INT_MAX)
break;
freqs[nvals] = low1 + low2;
lens[ind1] = nvals;
lens[ind2] = nvals;
freqs[ind1] *= -1;
freqs[ind2] *= -1;
nvals++;
}
nvals = nvals/2+1;
for (i = 0; i < nvals; i++) {
int code_len = 0;
for (k = lens[i]; k; k = lens[k])
code_len++;
lens[i] = code_len;
freqs[i] *= -1;
}
if (!(codes = malloc(nvals * sizeof(*codes))))
goto nomem;
for (i = 0; i < nvals; i++) {
codes[i].symbol = vals[i];
codes[i].len = lens[i];
}
qsort(codes, nvals, sizeof(*codes), code_sort);
code = 0; len = codes[0].len;
for (i = 0; i < nvals; i++) {
while (len != codes[i].len) {
code<<=1;
len++;
}
codes[i].code = code++;
if (codes[i].symbol >= -1 && codes[i].symbol < MAX_HUFF)
c->u.e_huffman.val2code[codes[i].symbol+1] = i;
}
free(lens);
free(vals);
free(freqs);
c->u.e_huffman.codes = codes;
c->u.e_huffman.nvals = nvals;
c->u.e_huffman.option = option;
c->free = cram_huffman_encode_free;
if (option == E_BYTE || option == E_BYTE_ARRAY) {
if (c->u.e_huffman.codes[0].len == 0)
c->encode = cram_huffman_encode_char0;
else
c->encode = cram_huffman_encode_char;
} else if (option == E_INT || option == E_SINT) {
if (c->u.e_huffman.codes[0].len == 0)
c->encode = cram_huffman_encode_int0;
else
c->encode = cram_huffman_encode_int;
} else if (option == E_LONG || option == E_SLONG) {
if (c->u.e_huffman.codes[0].len == 0)
c->encode = cram_huffman_encode_long0;
else
c->encode = cram_huffman_encode_long;
} else {
return NULL;
}
c->store = cram_huffman_encode_store;
c->flush = NULL;
return c;
nomem:
hts_log_error("Out of memory");
free(vals);
free(freqs);
free(lens);
free(c);
return NULL;
}
int cram_byte_array_len_decode(cram_slice *slice, cram_codec *c,
cram_block *in, char *out,
int *out_size) {
int32_t len = 0, one = 1;
int r;
r = c->u.byte_array_len.len_codec->decode(slice, c->u.byte_array_len.len_codec,
in, (char *)&len, &one);
if (!r && c->u.byte_array_len.val_codec && len >= 0) {
r = c->u.byte_array_len.val_codec->decode(slice,
c->u.byte_array_len.val_codec,
in, out, &len);
} else {
return -1;
}
*out_size = len;
return r;
}
void cram_byte_array_len_decode_free(cram_codec *c) {
if (!c) return;
if (c->u.byte_array_len.len_codec)
c->u.byte_array_len.len_codec->free(c->u.byte_array_len.len_codec);
if (c->u.byte_array_len.val_codec)
c->u.byte_array_len.val_codec->free(c->u.byte_array_len.val_codec);
free(c);
}
int cram_byte_array_len_describe(cram_codec *c, kstring_t *ks) {
int r = 0;
r |= ksprintf(ks, "BYTE_ARRAY_LEN(len_codec={") < 0;
cram_byte_array_len_decoder *l = &c->u.byte_array_len;
r |= l->len_codec->describe
? l->len_codec->describe(l->len_codec, ks)
: (ksprintf(ks, "?")<0);
r |= ksprintf(ks, "},val_codec={") < 0;
r |= l->val_codec->describe
? l->val_codec->describe(l->val_codec, ks)
: (ksprintf(ks, "?")<0);
r |= ksprintf(ks, "}") < 0;
return r;
}
cram_codec *cram_byte_array_len_decode_init(cram_block_compression_hdr *hdr,
char *data, int size,
enum cram_encoding codec,
enum cram_external_type option,
int version, varint_vec *vv) {
cram_codec *c;
char *cp = data;
char *endp = data + size;
if (!(c = malloc(sizeof(*c))))
return NULL;
c->codec = E_BYTE_ARRAY_LEN;
c->decode = cram_byte_array_len_decode;
c->free = cram_byte_array_len_decode_free;
c->describe = cram_byte_array_len_describe;
c->u.byte_array_len.len_codec = NULL;
c->u.byte_array_len.val_codec = NULL;
int encoding = vv->varint_get32(&cp, endp, NULL);
int sub_size = vv->varint_get32(&cp, endp, NULL);
if (sub_size < 0 || endp - cp < sub_size)
goto malformed;
c->u.byte_array_len.len_codec = cram_decoder_init(hdr, encoding, cp, sub_size,
E_INT, version, vv);
if (c->u.byte_array_len.len_codec == NULL)
goto no_codec;
cp += sub_size;
encoding = vv->varint_get32(&cp, endp, NULL);
sub_size = vv->varint_get32(&cp, endp, NULL);
if (sub_size < 0 || endp - cp < sub_size)
goto malformed;
c->u.byte_array_len.val_codec = cram_decoder_init(hdr, encoding, cp, sub_size,
option, version, vv);
if (c->u.byte_array_len.val_codec == NULL)
goto no_codec;
cp += sub_size;
if (cp - data != size)
goto malformed;
return c;
malformed:
hts_log_error("Malformed byte_array_len header stream");
no_codec:
cram_byte_array_len_decode_free(c);
return NULL;
}
int cram_byte_array_len_encode(cram_slice *slice, cram_codec *c,
char *in, int in_size) {
int32_t i32 = in_size;
int r = 0;
r |= c->u.e_byte_array_len.len_codec->encode(slice,
c->u.e_byte_array_len.len_codec,
(char *)&i32, 1);
r |= c->u.e_byte_array_len.val_codec->encode(slice,
c->u.e_byte_array_len.val_codec,
in, in_size);
return r;
}
void cram_byte_array_len_encode_free(cram_codec *c) {
if (!c)
return;
if (c->u.e_byte_array_len.len_codec)
c->u.e_byte_array_len.len_codec->free(c->u.e_byte_array_len.len_codec);
if (c->u.e_byte_array_len.val_codec)
c->u.e_byte_array_len.val_codec->free(c->u.e_byte_array_len.val_codec);
free(c);
}
int cram_byte_array_len_encode_store(cram_codec *c, cram_block *b,
char *prefix, int version) {
int len = 0, len2, len3, r = 0, n;
cram_codec *tc;
cram_block *b_len = NULL, *b_val = NULL;
if (prefix) {
size_t l = strlen(prefix);
BLOCK_APPEND(b, prefix, l);
len += l;
}
tc = c->u.e_byte_array_len.len_codec;
b_len = cram_new_block(0, 0);
if (!b_len) goto block_err;
len2 = tc->store(tc, b_len, NULL, version);
if (len2 < 0) goto block_err;
tc = c->u.e_byte_array_len.val_codec;
b_val = cram_new_block(0, 0);
if (!b_val) goto block_err;
len3 = tc->store(tc, b_val, NULL, version);
if (len3 < 0) goto block_err;
len += (n = c->vv->varint_put32_blk(b, c->codec)); r |= n;
len += (n = c->vv->varint_put32_blk(b, len2+len3)); r |= n;
BLOCK_APPEND(b, BLOCK_DATA(b_len), BLOCK_SIZE(b_len));
BLOCK_APPEND(b, BLOCK_DATA(b_val), BLOCK_SIZE(b_val));
cram_free_block(b_len);
cram_free_block(b_val);
if (r > 0)
return len + len2 + len3;
block_err:
if (b_len) cram_free_block(b_len);
if (b_val) cram_free_block(b_val);
return -1;
}
cram_codec *cram_byte_array_len_encode_init(cram_stats *st,
enum cram_encoding codec,
enum cram_external_type option,
void *dat,
int version, varint_vec *vv) {
cram_codec *c;
cram_byte_array_len_encoder *e = (cram_byte_array_len_encoder *)dat;
c = malloc(sizeof(*c));
if (!c)
return NULL;
c->codec = E_BYTE_ARRAY_LEN;
c->free = cram_byte_array_len_encode_free;
c->encode = cram_byte_array_len_encode;
c->store = cram_byte_array_len_encode_store;
c->flush = NULL;
c->u.e_byte_array_len.len_codec = cram_encoder_init(e->len_encoding,
st, E_INT,
e->len_dat,
version, vv);
c->u.e_byte_array_len.val_codec = cram_encoder_init(e->val_encoding,
NULL, E_BYTE_ARRAY,
e->val_dat,
version, vv);
if (!c->u.e_byte_array_len.len_codec ||
!c->u.e_byte_array_len.val_codec) {
cram_byte_array_len_encode_free(c);
return NULL;
}
return c;
}
static int cram_byte_array_stop_decode_char(cram_slice *slice, cram_codec *c,
cram_block *in, char *out,
int *out_size) {
char *cp, ch;
cram_block *b = NULL;
b = cram_get_block_by_id(slice, c->u.byte_array_stop.content_id);
if (!b)
return *out_size?-1:0;
if (b->idx >= b->uncomp_size)
return -1;
cp = (char *)b->data + b->idx;
if (out) {
while ((ch = *cp) != (char)c->u.byte_array_stop.stop) {
if (cp - (char *)b->data >= b->uncomp_size)
return -1;
*out++ = ch;
cp++;
}
} else {
while ((ch = *cp) != (char)c->u.byte_array_stop.stop) {
if (cp - (char *)b->data >= b->uncomp_size)
return -1;
cp++;
}
}
*out_size = cp - (char *)(b->data + b->idx);
b->idx = cp - (char *)b->data + 1;
return 0;
}
int cram_byte_array_stop_decode_block(cram_slice *slice, cram_codec *c,
cram_block *in, char *out_,
int *out_size) {
cram_block *b;
cram_block *out = (cram_block *)out_;
unsigned char *cp, *cp_end;
unsigned char stop;
b = cram_get_block_by_id(slice, c->u.byte_array_stop.content_id);
if (!b)
return *out_size?-1:0;
if (b->idx >= b->uncomp_size)
return -1;
cp = b->data + b->idx;
cp_end = b->data + b->uncomp_size;
stop = c->u.byte_array_stop.stop;
if (cp_end - cp < out->alloc - out->byte) {
unsigned char *out_cp = BLOCK_END(out);
while (cp != cp_end && *cp != stop)
*out_cp++ = *cp++;
BLOCK_SIZE(out) = out_cp - BLOCK_DATA(out);
} else {
unsigned char *cp_start;
for (cp_start = cp; cp != cp_end && *cp != stop; cp++)
;
BLOCK_APPEND(out, cp_start, cp - cp_start);
BLOCK_GROW(out, cp - cp_start);
}
*out_size = cp - (b->data + b->idx);
b->idx = cp - b->data + 1;
return 0;
block_err:
return -1;
}
void cram_byte_array_stop_decode_free(cram_codec *c) {
if (!c) return;
free(c);
}
int cram_byte_array_stop_describe(cram_codec *c, kstring_t *ks) {
return ksprintf(ks, "BYTE_ARRAY_STOP(stop=%d,id=%d)",
c->u.byte_array_stop.stop,
c->u.byte_array_stop.content_id)
< 0 ? -1 : 0;
}
cram_codec *cram_byte_array_stop_decode_init(cram_block_compression_hdr *hdr,
char *data, int size,
enum cram_encoding codec,
enum cram_external_type option,
int version, varint_vec *vv) {
cram_codec *c = NULL;
unsigned char *cp = (unsigned char *)data;
int err = 0;
if (size < (CRAM_MAJOR_VERS(version) == 1 ? 5 : 2))
goto malformed;
if (!(c = malloc(sizeof(*c))))
return NULL;
c->codec = E_BYTE_ARRAY_STOP;
switch (option) {
case E_BYTE_ARRAY_BLOCK:
c->decode = cram_byte_array_stop_decode_block;
break;
case E_BYTE_ARRAY:
c->decode = cram_byte_array_stop_decode_char;
break;
default:
hts_log_error("The byte_array_stop codec only supports BYTE_ARRAYs");
free(c);
return NULL;
}
c->free = cram_byte_array_stop_decode_free;
c->describe = cram_byte_array_stop_describe;
c->u.byte_array_stop.stop = *cp++;
if (CRAM_MAJOR_VERS(version) == 1) {
c->u.byte_array_stop.content_id = cp[0] + (cp[1]<<8) + (cp[2]<<16)
+ ((unsigned int) cp[3]<<24);
cp += 4;
} else {
c->u.byte_array_stop.content_id = vv->varint_get32((char **)&cp, data+size, &err);
}
if ((char *)cp - data != size || err)
goto malformed;
return c;
malformed:
hts_log_error("Malformed byte_array_stop header stream");
free(c);
return NULL;
}
int cram_byte_array_stop_encode(cram_slice *slice, cram_codec *c,
char *in, int in_size) {
BLOCK_APPEND(c->out, in, in_size);
BLOCK_APPEND_CHAR(c->out, c->u.e_byte_array_stop.stop);
return 0;
block_err:
return -1;
}
void cram_byte_array_stop_encode_free(cram_codec *c) {
if (!c)
return;
free(c);
}
int cram_byte_array_stop_encode_store(cram_codec *c, cram_block *b,
char *prefix, int version) {
int len = 0;
char buf[20], *cp = buf;
if (prefix) {
size_t l = strlen(prefix);
BLOCK_APPEND(b, prefix, l);
len += l;
}
cp += c->vv->varint_put32(cp, buf+20, c->codec);
if (CRAM_MAJOR_VERS(version) == 1) {
cp += c->vv->varint_put32(cp, buf+20, 5);
*cp++ = c->u.e_byte_array_stop.stop;
*cp++ = (c->u.e_byte_array_stop.content_id >> 0) & 0xff;
*cp++ = (c->u.e_byte_array_stop.content_id >> 8) & 0xff;
*cp++ = (c->u.e_byte_array_stop.content_id >> 16) & 0xff;
*cp++ = (c->u.e_byte_array_stop.content_id >> 24) & 0xff;
} else {
cp += c->vv->varint_put32(cp, buf+20, 1 +
c->vv->varint_size(c->u.e_byte_array_stop.content_id));
*cp++ = c->u.e_byte_array_stop.stop;
cp += c->vv->varint_put32(cp, buf+20, c->u.e_byte_array_stop.content_id);
}
BLOCK_APPEND(b, buf, cp-buf);
len += cp-buf;
return len;
block_err:
return -1;
}
cram_codec *cram_byte_array_stop_encode_init(cram_stats *st,
enum cram_encoding codec,
enum cram_external_type option,
void *dat,
int version, varint_vec *vv) {
cram_codec *c;
c = malloc(sizeof(*c));
if (!c)
return NULL;
c->codec = E_BYTE_ARRAY_STOP;
c->free = cram_byte_array_stop_encode_free;
c->encode = cram_byte_array_stop_encode;
c->store = cram_byte_array_stop_encode_store;
c->flush = NULL;
c->u.e_byte_array_stop.stop = ((int *)dat)[0];
c->u.e_byte_array_stop.content_id = ((int *)dat)[1];
return c;
}
const char *cram_encoding2str(enum cram_encoding t) {
switch (t) {
case E_NULL: return "NULL";
case E_EXTERNAL: return "EXTERNAL";
case E_GOLOMB: return "GOLOMB";
case E_HUFFMAN: return "HUFFMAN";
case E_BYTE_ARRAY_LEN: return "BYTE_ARRAY_LEN";
case E_BYTE_ARRAY_STOP: return "BYTE_ARRAY_STOP";
case E_BETA: return "BETA";
case E_SUBEXP: return "SUBEXP";
case E_GOLOMB_RICE: return "GOLOMB_RICE";
case E_GAMMA: return "GAMMA";
case E_VARINT_UNSIGNED: return "VARINT_UNSIGNED";
case E_VARINT_SIGNED: return "VARINT_SIGNED";
case E_CONST_BYTE: return "CONST_BYTE";
case E_CONST_INT: return "CONST_INT";
case E_NUM_CODECS:
default: return "?";
}
}
static cram_codec *(*decode_init[])(cram_block_compression_hdr *hdr,
char *data,
int size,
enum cram_encoding codec,
enum cram_external_type option,
int version, varint_vec *vv) = {
NULL, cram_external_decode_init,
NULL, cram_huffman_decode_init,
cram_byte_array_len_decode_init,
cram_byte_array_stop_decode_init,
cram_beta_decode_init,
cram_subexp_decode_init,
NULL, cram_gamma_decode_init,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, cram_varint_decode_init, cram_varint_decode_init, cram_const_decode_init, cram_const_decode_init,
NULL, NULL, NULL, NULL, NULL,
NULL, cram_xpack_decode_init,
cram_xrle_decode_init,
cram_xdelta_decode_init,
};
cram_codec *cram_decoder_init(cram_block_compression_hdr *hdr,
enum cram_encoding codec,
char *data, int size,
enum cram_external_type option,
int version, varint_vec *vv) {
if (codec >= E_NULL && codec < E_NUM_CODECS && decode_init[codec]) {
cram_codec *r = decode_init[codec](hdr, data, size, codec,
option, version, vv);
if (r) {
r->vv = vv;
r->codec_id = hdr->ncodecs++;
}
return r;
} else {
hts_log_error("Unimplemented codec of type %s", cram_encoding2str(codec));
return NULL;
}
}
static cram_codec *(*encode_init[])(cram_stats *stx,
enum cram_encoding codec,
enum cram_external_type option,
void *opt,
int version, varint_vec *vv) = {
NULL, cram_external_encode_init, NULL, cram_huffman_encode_init,
cram_byte_array_len_encode_init,
cram_byte_array_stop_encode_init,
cram_beta_encode_init,
NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, cram_varint_encode_init, cram_varint_encode_init, cram_const_encode_init, cram_const_encode_init,
NULL, NULL, NULL, NULL, NULL,
NULL, cram_xpack_encode_init,
cram_xrle_encode_init,
cram_xdelta_encode_init,
};
cram_codec *cram_encoder_init(enum cram_encoding codec,
cram_stats *st,
enum cram_external_type option,
void *dat,
int version, varint_vec *vv) {
if (st && !st->nvals)
return NULL;
if (option == E_BYTE || option == E_BYTE_ARRAY ||
option == E_BYTE_ARRAY_BLOCK) {
if (codec == E_VARINT_SIGNED || codec == E_VARINT_UNSIGNED)
codec = E_EXTERNAL;
else if (codec == E_CONST_INT)
codec = E_CONST_BYTE;
}
if (encode_init[codec]) {
cram_codec *r;
if ((r = encode_init[codec](st, codec, option, dat, version, vv)))
r->out = NULL;
if (!r) {
hts_log_error("Unable to initialise codec of type %s", cram_encoding2str(codec));
return NULL;
}
r->vv = vv;
return r;
} else {
hts_log_error("Unimplemented codec of type %s", cram_encoding2str(codec));
abort();
}
}
int cram_codec_to_id(cram_codec *c, int *id2) {
int bnum1, bnum2 = -2;
switch (c->codec) {
case E_CONST_INT:
case E_CONST_BYTE:
bnum1 = -2; break;
case E_HUFFMAN:
bnum1 = c->u.huffman.ncodes == 1 ? -2 : -1;
break;
case E_GOLOMB:
case E_BETA:
case E_SUBEXP:
case E_GOLOMB_RICE:
case E_GAMMA:
bnum1 = -1;
break;
case E_EXTERNAL:
case E_VARINT_UNSIGNED:
case E_VARINT_SIGNED:
bnum1 = c->u.external.content_id;
break;
case E_BYTE_ARRAY_LEN:
bnum1 = cram_codec_to_id(c->u.byte_array_len.len_codec, NULL);
bnum2 = cram_codec_to_id(c->u.byte_array_len.val_codec, NULL);
break;
case E_BYTE_ARRAY_STOP:
bnum1 = c->u.byte_array_stop.content_id;
break;
case E_NULL:
bnum1 = -2;
break;
default:
hts_log_error("Unknown codec type %d", c->codec);
bnum1 = -1;
}
if (id2)
*id2 = bnum2;
return bnum1;
}
int cram_codec_decoder2encoder(cram_fd *fd, cram_codec *c) {
int j;
switch (c->codec) {
case E_CONST_INT:
case E_CONST_BYTE:
c->store = cram_const_encode_store;
break;
case E_EXTERNAL:
c->free = cram_external_encode_free;
c->store = cram_external_encode_store;
if (c->decode == cram_external_decode_int)
c->encode = cram_external_encode_int;
else if (c->decode == cram_external_decode_long)
c->encode = cram_external_encode_long;
else if (c->decode == cram_external_decode_char)
c->encode = cram_external_encode_char;
else if (c->decode == cram_external_decode_block)
c->encode = cram_external_encode_char;
else
return -1;
break;
case E_VARINT_SIGNED:
case E_VARINT_UNSIGNED:
c->free = cram_varint_encode_free;
c->store = cram_varint_encode_store;
if (c->decode == cram_varint_decode_int)
c->encode = cram_varint_encode_int;
else if (c->decode == cram_varint_decode_sint)
c->encode = cram_varint_encode_sint;
else if (c->decode == cram_varint_decode_long)
c->encode = cram_varint_encode_long;
else if (c->decode == cram_varint_decode_slong)
c->encode = cram_varint_encode_slong;
else
return -1;
break;
case E_HUFFMAN: {
cram_codec *t = malloc(sizeof(*t));
if (!t) return -1;
t->vv = c->vv;
t->codec = E_HUFFMAN;
t->free = cram_huffman_encode_free;
t->store = cram_huffman_encode_store;
t->u.e_huffman.codes = c->u.huffman.codes;
t->u.e_huffman.nvals = c->u.huffman.ncodes;
t->u.e_huffman.option = c->u.huffman.option;
for (j = 0; j < t->u.e_huffman.nvals; j++) {
int32_t sym = t->u.e_huffman.codes[j].symbol;
if (sym >= -1 && sym < MAX_HUFF)
t->u.e_huffman.val2code[sym+1] = j;
}
if (c->decode == cram_huffman_decode_char0)
t->encode = cram_huffman_encode_char0;
else if (c->decode == cram_huffman_decode_char)
t->encode = cram_huffman_encode_char;
else if (c->decode == cram_huffman_decode_int0)
t->encode = cram_huffman_encode_int0;
else if (c->decode == cram_huffman_decode_int)
t->encode = cram_huffman_encode_int;
else if (c->decode == cram_huffman_decode_long0)
t->encode = cram_huffman_encode_long0;
else if (c->decode == cram_huffman_decode_long)
t->encode = cram_huffman_encode_long;
else {
free(t);
return -1;
}
*c = *t;
free(t);
break;
}
case E_BETA:
c->free = cram_beta_encode_free;
c->store = cram_beta_encode_store;
if (c->decode == cram_beta_decode_int)
c->encode = cram_beta_encode_int;
else if (c->decode == cram_beta_decode_long)
c->encode = cram_beta_encode_long;
else if (c->decode == cram_beta_decode_char)
c->encode = cram_beta_encode_char;
else
return -1;
break;
case E_XPACK: {
cram_codec t = *c;
t.free = cram_xpack_encode_free;
t.store = cram_xpack_encode_store;
if (t.decode == cram_xpack_decode_long)
t.encode = cram_xpack_encode_long;
else if (t.decode == cram_xpack_decode_int)
t.encode = cram_xpack_encode_int;
else if (t.decode == cram_xpack_decode_char)
t.encode = cram_xpack_encode_char;
else
return -1;
t.u.e_xpack.sub_codec = t.u.xpack.sub_codec;
if (cram_codec_decoder2encoder(fd, t.u.e_xpack.sub_codec) == -1)
return -1;
*c = t;
break;
}
case E_BYTE_ARRAY_LEN: {
cram_codec *t = malloc(sizeof(*t));
if (!t) return -1;
t->vv = c->vv;
t->codec = E_BYTE_ARRAY_LEN;
t->free = cram_byte_array_len_encode_free;
t->store = cram_byte_array_len_encode_store;
t->encode = cram_byte_array_len_encode;
t->u.e_byte_array_len.len_codec = c->u.byte_array_len.len_codec;
t->u.e_byte_array_len.val_codec = c->u.byte_array_len.val_codec;
if (cram_codec_decoder2encoder(fd, t->u.e_byte_array_len.len_codec) == -1 ||
cram_codec_decoder2encoder(fd, t->u.e_byte_array_len.val_codec) == -1) {
t->free(t);
return -1;
}
*c = *t;
free(t);
break;
}
case E_BYTE_ARRAY_STOP:
c->free = cram_byte_array_stop_encode_free;
c->store = cram_byte_array_stop_encode_store;
c->encode = cram_byte_array_stop_encode;
break;
default:
return -1;
}
return 0;
}
int cram_codec_describe(cram_codec *c, kstring_t *ks) {
if (c && c->describe)
return c->describe(c, ks);
else
return ksprintf(ks, "?");
}