#include <float.h>
#include <string.h>
#include "alloc_class.h"
#include "heap_layout.h"
#include "util.h"
#include "out.h"
#include "bucket.h"
#include "cuckoo.h"
#define RUN_CLASS_KEY_PACK(map_idx_s, header_type_s, size_idx_s)\
((uint64_t)(map_idx_s) << 32 |\
(uint64_t)(header_type_s) << 16 |\
(uint64_t)(size_idx_s))
#define ACLASS_RESERVED ((void *)0xFFFFFFFFULL)
#define MAX_RUN_SIZE (CHUNKSIZE * 10)
#define MAX_RUN_WASTED_BYTES 1024
#define MAX_ALLOC_CATEGORIES 9
#define FIRST_GENERATED_CLASS_SIZE 128
#define ALLOC_BLOCK_SIZE_GEN 64
static struct {
size_t size;
float step;
} categories[MAX_ALLOC_CATEGORIES] = {
{FIRST_GENERATED_CLASS_SIZE, 0.05f},
{1024, 0.05f},
{2048, 0.05f},
{4096, 0.05f},
{8192, 0.05f},
{16384, 0.05f},
{32768, 0.05f},
{131072, 0.05f},
{393216, 0.05f},
};
#define RUN_UNIT_MAX_ALLOC 8U
#define ALLOC_BLOCK_SIZE 16
#define SIZE_TO_CLASS_MAP_INDEX(_s, _g) (1 + (((_s) - 1) / (_g)))
#define RUN_SIZE_BYTES(size_idx)\
(RUNSIZE + ((size_idx - 1) * CHUNKSIZE))
#define RUN_MIN_NALLOCS 500
#define RUN_SIZE_IDX_CAP (16)
struct alloc_class_collection {
size_t granularity;
struct alloc_class *aclasses[MAX_ALLOCATION_CLASSES];
size_t last_run_max_size;
uint8_t *class_map_by_alloc_size;
struct cuckoo *class_map_by_unit_size;
int fail_on_missing_class;
int autogenerate_on_missing_class;
};
int
alloc_class_find_first_free_slot(struct alloc_class_collection *ac,
uint8_t *slot)
{
for (int n = 0; n < MAX_ALLOCATION_CLASSES; ++n) {
if (util_bool_compare_and_swap64(&ac->aclasses[n],
NULL, ACLASS_RESERVED)) {
*slot = (uint8_t)n;
return 0;
}
}
return -1;
}
int
alloc_class_reserve(struct alloc_class_collection *ac, uint8_t id)
{
return util_bool_compare_and_swap64(&ac->aclasses[id],
NULL, ACLASS_RESERVED) ? 0 : -1;
}
static void
alloc_class_reservation_clear(struct alloc_class_collection *ac, int id)
{
int ret = util_bool_compare_and_swap64(&ac->aclasses[id],
ACLASS_RESERVED, NULL);
ASSERT(ret);
}
void
alloc_class_generate_run_proto(struct alloc_class_run_proto *dest,
size_t unit_size, uint32_t size_idx)
{
ASSERTne(size_idx, 0);
dest->size_idx = size_idx;
dest->bitmap_nallocs = (uint32_t)
(RUN_SIZE_BYTES(dest->size_idx) / unit_size);
while (dest->bitmap_nallocs > RUN_BITMAP_SIZE) {
LOG(3, "tried to create allocation class (%lu) with number "
"of units (%u) exceeding the bitmap size (%u)",
unit_size, dest->bitmap_nallocs, RUN_BITMAP_SIZE);
if (dest->size_idx > 1) {
dest->size_idx -= 1;
dest->bitmap_nallocs = (uint32_t)
(RUN_SIZE_BYTES(dest->size_idx) / unit_size);
LOG(3, "allocation class (%lu) was constructed with "
"fewer (%u) than requested chunks (%u)",
unit_size, dest->size_idx, dest->size_idx + 1);
} else {
LOG(3, "allocation class (%lu) was constructed with "
"fewer units (%u) than optimal (%u), "
"this might lead to "
"inefficient memory utilization!",
unit_size,
RUN_BITMAP_SIZE, dest->bitmap_nallocs);
dest->bitmap_nallocs = RUN_BITMAP_SIZE;
}
}
ASSERT(dest->bitmap_nallocs <= RUN_BITMAP_SIZE);
unsigned unused_bits = RUN_BITMAP_SIZE - dest->bitmap_nallocs;
unsigned unused_values = unused_bits / BITS_PER_VALUE;
ASSERT(MAX_BITMAP_VALUES >= unused_values);
dest->bitmap_nval = MAX_BITMAP_VALUES - unused_values;
ASSERT(unused_bits >= unused_values * BITS_PER_VALUE);
unused_bits -= unused_values * BITS_PER_VALUE;
dest->bitmap_lastval = unused_bits ?
(((1ULL << unused_bits) - 1ULL) <<
(BITS_PER_VALUE - unused_bits)) : 0;
}
struct alloc_class *
alloc_class_register(struct alloc_class_collection *ac,
struct alloc_class *c)
{
struct alloc_class *nc = Malloc(sizeof(*nc));
if (nc == NULL)
goto error_class_alloc;
*nc = *c;
if (c->type == CLASS_RUN) {
size_t map_idx = SIZE_TO_CLASS_MAP_INDEX(nc->unit_size,
ac->granularity);
ASSERT(map_idx <= UINT32_MAX);
uint32_t map_idx_s = (uint32_t)map_idx;
ASSERT(nc->run.size_idx <= UINT16_MAX);
uint16_t size_idx_s = (uint16_t)nc->run.size_idx;
uint16_t header_type_s = (uint16_t)nc->header_type;
uint64_t k = RUN_CLASS_KEY_PACK(map_idx_s,
header_type_s, size_idx_s);
if (cuckoo_insert(ac->class_map_by_unit_size, k, nc) != 0) {
ERR("unable to register allocation class");
goto error_map_insert;
}
}
ac->aclasses[nc->id] = nc;
return nc;
error_map_insert:
Free(nc);
error_class_alloc:
alloc_class_reservation_clear(ac, c->id);
return NULL;
}
static struct alloc_class *
alloc_class_from_params(struct alloc_class_collection *ac,
enum alloc_class_type type,
size_t unit_size,
unsigned unit_max, unsigned unit_max_alloc,
uint32_t size_idx)
{
struct alloc_class c;
c.unit_size = unit_size;
c.header_type = HEADER_COMPACT;
c.type = type;
switch (type) {
case CLASS_HUGE:
c.id = DEFAULT_ALLOC_CLASS_ID;
break;
case CLASS_RUN:
alloc_class_generate_run_proto(&c.run, unit_size,
size_idx);
uint8_t slot;
if (alloc_class_find_first_free_slot(ac, &slot) != 0) {
return NULL;
}
c.id = slot;
break;
default:
ASSERT(0);
}
return alloc_class_register(ac, &c);
}
void
alloc_class_delete(struct alloc_class_collection *ac,
struct alloc_class *c)
{
ac->aclasses[c->id] = NULL;
Free(c);
}
static struct alloc_class *
alloc_class_find_or_create(struct alloc_class_collection *ac, size_t n)
{
COMPILE_ERROR_ON(MAX_ALLOCATION_CLASSES > UINT8_MAX);
uint64_t required_size_bytes = n * RUN_MIN_NALLOCS;
uint32_t required_size_idx = 1;
if (required_size_bytes > RUNSIZE) {
required_size_bytes -= RUNSIZE;
required_size_idx +=
CALC_SIZE_IDX(CHUNKSIZE, required_size_bytes);
if (required_size_idx > RUN_SIZE_IDX_CAP)
required_size_idx = RUN_SIZE_IDX_CAP;
}
for (int i = MAX_ALLOCATION_CLASSES - 1; i >= 0; --i) {
struct alloc_class *c = ac->aclasses[i];
if (c == NULL || c->type == CLASS_HUGE ||
c->run.size_idx < required_size_idx)
continue;
if (n % c->unit_size == 0 &&
n / c->unit_size <= RUN_UNIT_MAX_ALLOC)
return c;
}
size_t runsize_bytes = RUN_SIZE_BYTES(required_size_idx);
while ((runsize_bytes % n) > MAX_RUN_WASTED_BYTES) {
n += ALLOC_BLOCK_SIZE_GEN;
}
for (int i = 1; i < MAX_ALLOCATION_CLASSES; ++i) {
struct alloc_class *c = ac->aclasses[i];
if (c == NULL || c->type == CLASS_HUGE)
continue;
if (n / c->unit_size <= RUN_UNIT_MAX_ALLOC &&
n % c->unit_size == 0)
return c;
if (c->unit_size == n)
return c;
}
return alloc_class_from_params(ac, CLASS_RUN, n,
RUN_UNIT_MAX, RUN_UNIT_MAX_ALLOC, required_size_idx);
}
static struct alloc_class *
alloc_class_find_min_frag(struct alloc_class_collection *ac, size_t n)
{
struct alloc_class *best_c = NULL;
float best_frag = FLT_MAX;
ASSERTne(n, 0);
for (int i = MAX_ALLOCATION_CLASSES - 1; i >= 0; --i) {
struct alloc_class *c = ac->aclasses[i];
if (c == NULL || c->header_type == HEADER_NONE)
continue;
size_t real_size = n + header_type_to_size[c->header_type];
size_t units = CALC_SIZE_IDX(c->unit_size, real_size);
if (units > RUN_UNIT_MAX_ALLOC)
continue;
float frag = (float)(c->unit_size * units) / (float)real_size;
if (frag == 1.f)
return c;
ASSERT(frag >= 1.f);
if (frag < best_frag || best_c == NULL) {
best_c = c;
best_frag = frag;
}
}
ASSERTne(best_c, NULL);
return best_c;
}
struct alloc_class_collection *
alloc_class_collection_new()
{
struct alloc_class_collection *ac = Zalloc(sizeof(*ac));
if (ac == NULL)
return NULL;
memset(ac->aclasses, 0, sizeof(ac->aclasses));
ac->granularity = ALLOC_BLOCK_SIZE;
ac->last_run_max_size = MAX_RUN_SIZE;
ac->fail_on_missing_class = 0;
ac->autogenerate_on_missing_class = 1;
size_t maps_size = (MAX_RUN_SIZE / ac->granularity) + 1;
if ((ac->class_map_by_alloc_size = Malloc(maps_size)) == NULL)
goto error;
if ((ac->class_map_by_unit_size = cuckoo_new()) == NULL)
goto error;
memset(ac->class_map_by_alloc_size, 0xFF, maps_size);
if (alloc_class_from_params(ac, CLASS_HUGE, CHUNKSIZE, 0, 0, 1) == NULL)
goto error;
struct alloc_class *predefined_class =
alloc_class_from_params(ac, CLASS_RUN, MIN_RUN_SIZE,
RUN_UNIT_MAX, RUN_UNIT_MAX_ALLOC, 1);
if (predefined_class == NULL)
goto error;
for (size_t i = 0; i < FIRST_GENERATED_CLASS_SIZE / ac->granularity;
++i) {
ac->class_map_by_alloc_size[i] = predefined_class->id;
}
size_t granularity_mask = ALLOC_BLOCK_SIZE_GEN - 1;
for (int c = 1; c < MAX_ALLOC_CATEGORIES; ++c) {
size_t n = categories[c - 1].size + ALLOC_BLOCK_SIZE_GEN;
do {
if (alloc_class_find_or_create(ac, n) == NULL)
goto error;
float stepf = (float)n * categories[c].step;
size_t stepi = (size_t)stepf;
stepi = stepf == stepi ? stepi : stepi + 1;
n += (stepi + (granularity_mask)) & ~granularity_mask;
} while (n <= categories[c].size);
}
uint8_t largest_aclass_slot;
for (largest_aclass_slot = MAX_ALLOCATION_CLASSES - 1;
largest_aclass_slot > 0 &&
ac->aclasses[largest_aclass_slot] == NULL;
--largest_aclass_slot) {
}
struct alloc_class *c = ac->aclasses[largest_aclass_slot];
size_t real_unit_max = c->run.bitmap_nallocs < RUN_UNIT_MAX_ALLOC ?
c->run.bitmap_nallocs : RUN_UNIT_MAX_ALLOC;
size_t theoretical_run_max_size = c->unit_size * real_unit_max;
ac->last_run_max_size = MAX_RUN_SIZE > theoretical_run_max_size ?
theoretical_run_max_size : MAX_RUN_SIZE;
#ifdef DEBUG
for (size_t i = 0; i < MAX_ALLOCATION_CLASSES; ++i) {
struct alloc_class *c = ac->aclasses[i];
if (c != NULL && c->type == CLASS_RUN) {
ASSERTeq(i, c->id);
ASSERTeq(alloc_class_by_run(ac, c->unit_size,
c->header_type, c->run.size_idx), c);
}
}
#endif
return ac;
error:
alloc_class_collection_delete(ac);
return NULL;
}
void
alloc_class_collection_delete(struct alloc_class_collection *ac)
{
for (size_t i = 0; i < MAX_ALLOCATION_CLASSES; ++i) {
struct alloc_class *c = ac->aclasses[i];
if (c != NULL) {
alloc_class_delete(ac, c);
}
}
cuckoo_delete(ac->class_map_by_unit_size);
Free(ac->class_map_by_alloc_size);
Free(ac);
}
static struct alloc_class *
alloc_class_assign_by_size(struct alloc_class_collection *ac,
size_t size)
{
size_t class_map_index = SIZE_TO_CLASS_MAP_INDEX(size,
ac->granularity);
struct alloc_class *c = alloc_class_find_min_frag(ac,
class_map_index * ac->granularity);
ASSERTne(c, NULL);
util_bool_compare_and_swap64(
&ac->class_map_by_alloc_size[class_map_index],
MAX_ALLOCATION_CLASSES, c->id);
return c;
}
struct alloc_class *
alloc_class_by_alloc_size(struct alloc_class_collection *ac, size_t size)
{
if (size < ac->last_run_max_size) {
uint8_t class_id = ac->class_map_by_alloc_size[
SIZE_TO_CLASS_MAP_INDEX(size, ac->granularity)];
if (class_id == MAX_ALLOCATION_CLASSES) {
if (ac->fail_on_missing_class)
return NULL;
else if (ac->autogenerate_on_missing_class)
return alloc_class_assign_by_size(ac, size);
else
return ac->aclasses[DEFAULT_ALLOC_CLASS_ID];
}
return ac->aclasses[class_id];
} else {
return ac->aclasses[DEFAULT_ALLOC_CLASS_ID];
}
}
struct alloc_class *
alloc_class_by_run(struct alloc_class_collection *ac,
size_t unit_size, enum header_type header_type, uint32_t size_idx)
{
size_t map_idx = SIZE_TO_CLASS_MAP_INDEX(unit_size, ac->granularity);
ASSERT(map_idx <= UINT32_MAX);
uint32_t map_idx_s = (uint32_t)map_idx;
ASSERT(size_idx <= UINT16_MAX);
uint16_t size_idx_s = (uint16_t)size_idx;
uint16_t header_type_s = (uint16_t)header_type;
return cuckoo_get(ac->class_map_by_unit_size,
RUN_CLASS_KEY_PACK(map_idx_s, header_type_s, size_idx_s));
}
struct alloc_class *
alloc_class_by_id(struct alloc_class_collection *ac, uint8_t id)
{
return ac->aclasses[id];
}
ssize_t
alloc_class_calc_size_idx(struct alloc_class *c, size_t size)
{
uint32_t size_idx = CALC_SIZE_IDX(c->unit_size,
size + header_type_to_size[c->header_type]);
if (c->type == CLASS_RUN) {
if (c->header_type == HEADER_NONE && size_idx != 1)
return -1;
else if (size_idx > RUN_UNIT_MAX)
return -1;
else if (size_idx > c->run.bitmap_nallocs)
return -1;
}
return size_idx;
}