#include <assert.h>
#include <errno.h>
#include <limits.h>
#include <stdarg.h>
#include <stdalign.h>
#include <stddef.h>
#include <stdio.h>
#include <string.h>
#include <unistd.h>
#define CAML_NAME_SPACE
#define CAML_INTERNALS
#include "boxroot.h"
#include <caml/minor_gc.h>
#include <caml/major_gc.h>
#if defined(_POSIX_TIMERS) && defined(_POSIX_MONOTONIC_CLOCK)
#define POSIX_CLOCK
#include <time.h>
#endif
#include "ocaml_hooks.h"
#include "platform.h"
#if OCAML_MULTICORE
#include <caml/lf_skiplist.h>
#endif
static_assert(!BXR_FORCE_REMOTE || BXR_MULTITHREAD,
"invalid configuration");
enum {
YOUNG = BXR_CLASS_YOUNG,
OLD,
UNTRACKED
};
struct bxr_private {
bxr_slot contents;
};
typedef struct {
halfptr_t a_next;
halfptr_t a_dealloc_count;
} delayed_free_list;
static_assert(sizeof(delayed_free_list) == sizeof(void *),
"Wrong _Atomic(delayed_free_list) size");
typedef struct pool {
bxr_free_list free_list;
struct pool *prev;
struct pool *next;
alignas(Cache_line_size) _Atomic(delayed_free_list) delayed_fl;
mutex_t mutex;
bxr_slot roots[];
} pool;
#define POOL_CAPACITY ((int)((BXR_POOL_SIZE - sizeof(pool)) / sizeof(bxr_slot)))
static_assert(BXR_POOL_SIZE / sizeof(bxr_slot) <= INT_MAX, "pool size too large");
static_assert(POOL_CAPACITY >= 1, "pool size too small");
static_assert(offsetof(pool, free_list) == 0, "incorrect free_list offset");
static bxr_free_list const empty_fl = { (bxr_slot_ref)&empty_fl, -1, -1, UNTRACKED };
static bxr_free_list * const empty_fl_ptr = (bxr_free_list *) &empty_fl;
typedef struct {
pool *old;
pool *young;
pool *current;
bxr_free_list *current_fl;
pool *free;
bool reordered_since_minor;
} bxr_domain_state;
static void init_pool_rings(bxr_domain_state *dom_st)
{
dom_st->old = NULL;
dom_st->young = NULL;
dom_st->current = NULL;
dom_st->current_fl = empty_fl_ptr;
dom_st->free = NULL;
dom_st->reordered_since_minor = true;
}
static void free_pool_rings(bxr_domain_state *);
#if OCAML_MULTICORE
static struct lf_skiplist domain_state_skiplist;
#define Cached_fl_ptr_init ((bxr_free_list **) &empty_fl_ptr)
_Thread_local bxr_domain_state *bxr_cached_domain_state = NULL;
_Thread_local bxr_free_list **bxr_cached_fl_ptr = Cached_fl_ptr_init;
static void init_bxr_domain_state(void)
{
caml_lf_skiplist_init(&domain_state_skiplist);
}
static bxr_domain_state *get_bxr_domain_state(int dom_id)
{
if (bxr_cached_domain_state != NULL) return bxr_cached_domain_state;
DEBUGassert(dom_id >= 0);
uintnat key = dom_id + 1;
bxr_domain_state *value = NULL;
if (caml_lf_skiplist_find(&domain_state_skiplist, key, (uintnat *) &value)) {
bxr_cached_domain_state = value;
}
return value;
}
static void set_current_fl(bxr_domain_state *local, bxr_free_list *fl)
{
DEBUGassert(local != NULL);
local->current_fl = fl;
}
static bool thread_initialised(void)
{
return bxr_cached_fl_ptr != Cached_fl_ptr_init;
}
static bool init_thread(int dom_id)
{
DEBUGassert(dom_id >= 0);
bxr_domain_state *local = get_bxr_domain_state(dom_id);
if (local == NULL) {
local = malloc(sizeof(bxr_domain_state));
if (local == NULL) return false;
init_pool_rings(local);
caml_lf_skiplist_insert(&domain_state_skiplist, dom_id + 1, (uintnat) local);
}
bxr_cached_fl_ptr = &local->current_fl;
return true;
}
static void free_domain_state()
{
FOREACH_LF_SKIPLIST_ELEMENT(var, &domain_state_skiplist, {
bxr_domain_state *dom_st = (bxr_domain_state *)var->data;
free_pool_rings(dom_st);
free(dom_st);
caml_lf_skiplist_remove(&domain_state_skiplist, var->key);
});
caml_lf_skiplist_free_garbage(&domain_state_skiplist);
}
#else
bxr_free_list *bxr_current_fl = empty_fl_ptr;
static bxr_domain_state bxr_state = { NULL, };
static void init_bxr_domain_state(void) {};
static bxr_domain_state *get_bxr_domain_state(int dom_id)
{
DEBUGassert(dom_id == 0);
return &bxr_state;
}
static void set_current_fl(bxr_domain_state *, bxr_free_list *fl)
{
bxr_current_fl = fl;
}
static bool thread_initialised(void) { return true; }
static bool init_thread(int) { return true; }
static void free_domain_state()
{
free_pool_rings(&bxr_state);
bxr_current_fl = empty_fl_ptr;
}
#endif
static bxr_domain_state orphan = { NULL, };
static mutex_t orphan_mutex = BXR_MUTEX_INITIALIZER;
static struct {
atomic_llong minor_collections;
atomic_llong major_collections;
atomic_llong total_create_young;
atomic_llong total_create_old;
atomic_llong total_create_slow;
atomic_llong total_create_slow_time;
atomic_llong total_delete_young;
atomic_llong total_delete_old;
atomic_llong total_delete_aux_local;
atomic_llong total_delete_remote_domain;
atomic_llong total_delete_remote_nolock;
atomic_llong total_modify;
atomic_llong total_modify_slow;
atomic_llong total_scanning_work_minor;
atomic_llong total_scanning_work_major;
atomic_llong setup_time;
atomic_llong time_counter_calls;
atomic_llong total_minor_time;
atomic_llong total_major_time;
atomic_llong peak_minor_time;
atomic_llong peak_major_time;
atomic_llong total_alloced_pools;
atomic_llong total_emptied_pools;
atomic_llong total_freed_pools;
atomic_llong live_pools; atomic_llong peak_pools; atomic_llong ring_operations; atomic_llong young_hit_gen;
atomic_llong young_hit_young;
atomic_llong get_pool_header; atomic_llong is_pool_member; atomic_llong old_reordering; atomic_llong old_reordering_time; } stats;
static inline pool * get_pool_header(bxr_slot_ref s)
{
if (BOXROOT_DEBUG) STATS_INCR(get_pool_header);
return (pool *)Bxr_get_pool_header(s);
}
static inline bool is_pool_member(bxr_slot v, pool *p)
{
if (BOXROOT_DEBUG) STATS_INCR(is_pool_member);
return (uintptr_t)p == ((uintptr_t)v.as_slot_ref & ~((uintptr_t)BXR_POOL_SIZE - 2));
}
static inline bool is_empty_free_list(bxr_slot_ref v, pool *p)
{
return (v == (bxr_slot_ref)p);
}
static_assert(BXR_POOL_LOG_SIZE <= sizeof(halfptr_t) * 8, "BXR_POOL_LOG_SIZE too large");
static delayed_free_list make_delayed_fl(pool *p,
bxr_slot_ref next,
int dealloc_count)
{
return (delayed_free_list) { .a_next = (char *)next - (char *)p,
.a_dealloc_count = dealloc_count };
}
static void extract_delayed_fl(delayed_free_list val,
pool *p,
bxr_slot_ref *out_next,
int *out_dealloc_count)
{
*out_next = (bxr_slot_ref)((char *)p + val.a_next);
*out_dealloc_count = val.a_dealloc_count;
}
static const delayed_free_list empty_delayed_fl =
{ .a_next = 0, .a_dealloc_count = 0 };
static inline void ring_link(pool *p, pool *q)
{
p->next = q;
q->prev = p;
if (BOXROOT_DEBUG) STATS_INCR(ring_operations);
}
static inline void ring_push_back(pool *source, pool **target)
{
if (source == NULL) return;
DEBUGassert(source->prev == source && source->next == source);
DEBUGassert(source != *target);
if (*target == NULL) {
*target = source;
} else {
DEBUGassert((*target)->free_list.classe == source->free_list.classe);
pool *target_last = (*target)->prev;
pool *source_last = source->prev;
ring_link(target_last, source);
ring_link(source_last, *target);
}
}
static pool * ring_pop(pool **target)
{
pool *front = *target;
DEBUGassert(front != NULL);
if (front->next == front) {
*target = NULL;
} else {
*target = front->next;
ring_link(front->prev, front->next);
}
ring_link(front, front);
return front;
}
static pool * ring_remove(pool **source, pool *p)
{
pool **new_source = (p == *source) ? source : &p;
return ring_pop(new_source);
}
static long long time_counter(void);
static inline bxr_slot_ref empty_free_list(pool *p) { return (bxr_slot_ref)p; }
static inline bool is_full_pool(pool *p)
{
return is_empty_free_list(p->free_list.next, p);
}
static pool * get_empty_pool()
{
pool *p = bxr_alloc_uninitialised_pool(BXR_POOL_SIZE);
if (p == NULL) return NULL;
if (BOXROOT_STATS) {
long long live_pools = 1 + incr(&stats.live_pools);
if (live_pools > stats.peak_pools) stats.peak_pools = live_pools;
}
STATS_INCR(total_alloced_pools);
ring_link(p, p);
p->free_list.next = p->roots;
p->free_list.alloc_count = 0;
p->free_list.domain_id = -1;
p->free_list.classe = UNTRACKED;
store_relaxed(&p->delayed_fl, empty_delayed_fl);
bxr_initialize_mutex(&p->mutex);
p->roots[POOL_CAPACITY - 1].as_slot_ref = empty_free_list(p);
for (bxr_slot_ref s = p->roots + POOL_CAPACITY - 2; s >= p->roots; --s) {
s->as_slot_ref = s + 1;
}
return p;
}
static void free_pool_ring(pool **ring)
{
while (*ring != NULL) {
pool *p = ring_pop(ring);
bxr_free_pool(p);
STATS_INCR(total_freed_pools);
}
}
static void free_pool_rings(bxr_domain_state *bxr_dom_st)
{
free_pool_ring(&bxr_dom_st->old);
free_pool_ring(&bxr_dom_st->young);
free_pool_ring(&bxr_dom_st->current);
free_pool_ring(&bxr_dom_st->free);
}
static int anticipated_alloc_count(pool *p)
{
delayed_free_list dfl = load_relaxed(&p->delayed_fl);
return p->free_list.alloc_count - dfl.a_dealloc_count;
}
static inline bool is_not_too_full(pool *p)
{
return anticipated_alloc_count(p) <= (int)(BXR_DEALLOC_THRESHOLD / sizeof(bxr_slot));
}
static void set_current_pool(int dom_id, pool *p)
{
bxr_domain_state *local = get_bxr_domain_state(dom_id);
DEBUGassert(local->current == NULL);
if (p == NULL) return;
DEBUGassert(p->next == p);
p->free_list.domain_id = dom_id;
local->current = p;
p->free_list.classe = YOUNG;
p->free_list.alloc_count++;
set_current_fl(local, &p->free_list);
}
static void reclassify_pool(pool **source, int dom_id, int cl);
static int reclassify_ring(pool **ring, int dom_id, int cl);
static void move_current_to_young(int dom_id)
{
bxr_domain_state *local = get_bxr_domain_state(dom_id);
if (local->current != NULL) {
pool *p = ring_pop(&local->current);
p->free_list.alloc_count--;
reclassify_pool(&p, dom_id, YOUNG);
set_current_fl(local, empty_fl_ptr);
}
}
static void try_demote_pool(int dom_id, pool *p)
{
DEBUGassert(p->free_list.classe != UNTRACKED);
bxr_domain_state *local = get_bxr_domain_state(dom_id);
if (p == local->current || !is_not_too_full(p)) return;
int cl = (anticipated_alloc_count(p) == 0) ? UNTRACKED : p->free_list.classe;
pool **source = (p == local->old) ? &local->old :
(p == local->young) ? &local->young : &p;
reclassify_pool(source, dom_id, cl);
}
static void validate_pool(pool *pl);
static void flush_delayed_fl(pool *p)
{
DEBUGassert(is_full_pool(p));
delayed_free_list fl = atomic_exchange_explicit(&p->delayed_fl, empty_delayed_fl,
memory_order_acquire);
bxr_slot_ref delayed_next;
int dealloc_count;
extract_delayed_fl(fl, p, &delayed_next, &dealloc_count);
p->free_list.next = delayed_next;
p->free_list.alloc_count = POOL_CAPACITY - dealloc_count;
}
static inline pool * pop_available(pool **target)
{
pool *start = *target;
if (start == NULL) return NULL;
pool *p = start;
do {
if (anticipated_alloc_count(p) < POOL_CAPACITY)
return ring_remove(target, p);
p = p->next;
} while (p != start);
return NULL;
}
static void reorder_old_pools(int dom_id);
static pool * get_available_pool(int dom_id)
{
bxr_domain_state *local = get_bxr_domain_state(dom_id);
pool *p = pop_available(&local->young);
if (p == NULL && local->old != NULL) {
if (!is_not_too_full(local->old) && !local->reordered_since_minor) {
local->reordered_since_minor = true;
reorder_old_pools(dom_id);
}
if (is_not_too_full(local->old))
p = ring_pop(&local->old);
}
if (p == NULL && local->free != NULL) p = ring_pop(&local->free);
if (p == NULL) p = get_empty_pool();
if (is_full_pool(p)) flush_delayed_fl(p);
DEBUGassert(!is_full_pool(p));
return p;
}
static void validate_all_pools(int dom_id);
static void reclassify_pool(pool **source, int dom_id, int cl)
{
DEBUGassert(*source != NULL);
bxr_domain_state *local = get_bxr_domain_state(dom_id);
pool *p = ring_pop(source);
p->free_list.domain_id = dom_id;
pool **target = NULL;
switch (cl) {
case OLD: target = &local->old; break;
case YOUNG: target = &local->young; break;
case UNTRACKED:
target = &local->free;
DEBUGassert(anticipated_alloc_count(p) == 0);
STATS_INCR(total_emptied_pools);
STATS_DECR(live_pools);
break;
}
p->free_list.classe = cl;
ring_push_back(p, target);
if (is_not_too_full(p)) *target = p;
}
static int reclassify_ring(pool **source, int dom_id, int cl)
{
pool *ring = *source;
int work = 0;
*source = NULL;
while (ring != NULL) {
ring = ring->prev;
reclassify_pool(&ring, dom_id, cl);
work++;
}
return work;
}
static void reorder_old_pools(int dom_id)
{
bxr_domain_state *local = get_bxr_domain_state(dom_id);
long long reorder_start = time_counter();
int reordered = reclassify_ring(&local->old, dom_id, OLD);
if (BOXROOT_STATS) {
long long reorder_duration = time_counter() - reorder_start;
stats.old_reordering += reordered;
stats.old_reordering_time += reorder_duration;
}
}
static atomic_int status = BOXROOT_NOT_SETUP;
int boxroot_status()
{
return load_relaxed(&status);
}
static bool setup();
boxroot bxr_create_slow(value init)
{
STATS_INCR(total_create_slow);
long long time = time_counter();
if (Caml_state_opt == NULL) { errno = EPERM; return NULL; }
if (!setup()) return NULL;
#if !OCAML_MULTICORE
if (!bxr_domain_lock_held()) { errno = EPERM; return NULL; }
if (!bxr_check_thread_hooks()) {
status = BOXROOT_INVALID;
return NULL;
}
#endif
int dom_id = Domain_id;
if (!thread_initialised()) {
if (!init_thread(dom_id)) return NULL;
return boxroot_create(init);
}
bxr_domain_state *local = get_bxr_domain_state(dom_id);
DEBUGassert(local != NULL);
if (local->current != NULL) {
DEBUGassert(is_full_pool(local->current));
}
pool *available = get_available_pool(dom_id);
if (available == NULL) return NULL;
move_current_to_young(dom_id);
set_current_pool(dom_id, available);
boxroot res = boxroot_create(init);
if (BOXROOT_STATS) stats.total_create_slow_time += time_counter() - time;
return res;
}
extern inline value boxroot_get(boxroot root);
extern inline value const * boxroot_get_ref(boxroot root);
void bxr_create_debug(value init)
{
DEBUGassert(Caml_state_opt != NULL);
if (Is_block(init) && Is_young(init)) STATS_INCR(total_create_young);
else STATS_INCR(total_create_old);
}
extern inline boxroot boxroot_create(value init);
extern inline bool bxr_free_slot(bxr_free_list *fl, boxroot root);
void bxr_delete_debug(boxroot root)
{
DEBUGassert(root != NULL);
value v = boxroot_get(root);
if (Is_block(v) && Is_young(v)) STATS_INCR(total_delete_young);
else STATS_INCR(total_delete_old);
}
static void free_slot_atomic(pool *p, boxroot root)
{
bxr_slot_ref new_next = &root->contents;
delayed_free_list old_fl = load_relaxed(&p->delayed_fl);
bxr_slot_ref old_next;
int old_dealloc_count;
delayed_free_list new_fl;
do {
extract_delayed_fl(old_fl, p, &old_next, &old_dealloc_count);
new_fl = make_delayed_fl(p, new_next, old_dealloc_count + 1);
new_next->as_slot_ref = old_next;
} while (!atomic_compare_exchange_weak_explicit(&p->delayed_fl,
&old_fl, new_fl,
memory_order_release,
memory_order_relaxed));
}
void bxr_delete_aux(boxroot root, bxr_free_list *fl, bool remote)
{
pool *p = (pool *)fl;
if (!remote) {
STATS_INCR(total_delete_aux_local);
try_demote_pool(p->free_list.domain_id, p);
} else if (OCAML_MULTICORE && bxr_domain_lock_held()) {
if (BOXROOT_DEBUG) STATS_INCR(total_delete_remote_domain);
free_slot_atomic(p, root);
} else {
if (BOXROOT_DEBUG) STATS_INCR(total_delete_remote_nolock);
bxr_mutex_lock(&p->mutex);
free_slot_atomic(p, root);
bxr_mutex_unlock(&p->mutex);
}
}
extern inline void boxroot_delete(boxroot root);
bool bxr_modify_slow(boxroot *root_ref, value new_value)
{
STATS_INCR(total_modify_slow);
if (!bxr_domain_lock_held()) { errno = EPERM; return false; }
boxroot new = boxroot_create(new_value);
if (BXR_UNLIKELY(new == NULL)) return false;
boxroot old = *root_ref;
*root_ref = new;
boxroot_delete(old);
return true;
}
void bxr_modify_debug(boxroot *rootp)
{
DEBUGassert(*rootp);
STATS_INCR(total_modify);
}
extern inline bool boxroot_modify(boxroot *rootp, value new_value);
static void validate_fl(pool *pl, bxr_slot_ref curr, int length)
{
int pos = 0;
for (; curr != empty_free_list(pl); curr = curr->as_slot_ref, pos++)
{
assert(pos < POOL_CAPACITY);
assert(curr >= pl->roots && curr < pl->roots + POOL_CAPACITY);
}
DEBUGassert(pos == length);
}
static void validate_pool(pool *pl)
{
if (pl->free_list.next == NULL) {
assert(pl->free_list.classe == UNTRACKED);
return;
}
validate_fl(pl, pl->free_list.next, POOL_CAPACITY - pl->free_list.alloc_count);
bxr_slot_ref delayed_next;
int dealloc_count;
extract_delayed_fl(pl->delayed_fl, pl, &delayed_next, &dealloc_count);
validate_fl(pl, delayed_next, dealloc_count);
int count = 0;
for(int i = 0; i < POOL_CAPACITY; i++) {
bxr_slot s = pl->roots[i];
STATS_DECR(is_pool_member); if (!is_pool_member(s, pl)) {
value v = s.as_value;
if (pl->free_list.classe != YOUNG && Is_block(v)) assert(!Is_young(v));
++count;
}
}
assert(count == anticipated_alloc_count(pl));
}
static void validate_ring(pool **ring, int dom_id, int cl)
{
pool *start_pool = *ring;
if (start_pool == NULL) return;
pool *p = start_pool;
do {
assert(p->free_list.domain_id == dom_id);
assert(p->free_list.classe == cl);
validate_pool(p);
assert(p->next != NULL);
assert(p->next->prev == p);
assert(p->prev != NULL);
assert(p->prev->next == p);
p = p->next;
} while (p != start_pool);
}
static void validate_current_pool(pool **current, int dom_id)
{
if (*current != NULL) {
assert((*current)->next == *current && (*current)->prev == *current);
(*current)->free_list.alloc_count--;
}
validate_ring(current, dom_id, YOUNG);
if (*current != NULL) (*current)->free_list.alloc_count++;
}
static void validate_all_pools(int dom_id)
{
bxr_domain_state *local = get_bxr_domain_state(dom_id);
if (local == NULL) return;
validate_ring(&local->old, dom_id, OLD);
validate_ring(&local->young, dom_id, YOUNG);
validate_current_pool(&local->current, dom_id);
validate_ring(&local->free, dom_id, UNTRACKED);
}
static void orphan_pools(int dom_id)
{
bxr_domain_state *local = get_bxr_domain_state(dom_id);
if (local == NULL) return;
move_current_to_young(dom_id);
bxr_mutex_lock(&orphan_mutex);
ring_push_back(local->old, &orphan.old);
ring_push_back(local->young, &orphan.young);
bxr_mutex_unlock(&orphan_mutex);
free_pool_ring(&local->free);
init_pool_rings(local);
}
static void adopt_orphaned_pools(int dom_id)
{
bxr_mutex_lock(&orphan_mutex);
reclassify_ring(&orphan.old, dom_id, OLD);
reclassify_ring(&orphan.young, dom_id, YOUNG);
bxr_mutex_unlock(&orphan_mutex);
}
static inline int scan_pool_aux(scanning_action action, int only_young, void *data, pool *pl)
{
int allocs_to_find = anticipated_alloc_count(pl);
int young_hit = 0;
bxr_slot_ref current = pl->roots;
minor_heap_info heap_info = get_minor_heap_info();
while (allocs_to_find) {
DEBUGassert(current < &pl->roots[POOL_CAPACITY]);
bxr_slot s = *current;
if (!is_pool_member(s, pl)) {
--allocs_to_find;
value v = s.as_value;
if (BOXROOT_DEBUG && Bxr_is_young(heap_info, v)) ++young_hit;
if (!only_young || Bxr_is_young(heap_info, v))
CALL_GC_ACTION(action, data, v, ¤t->as_value);
}
++current;
}
if (BOXROOT_STATS) {
if (only_young) stats.young_hit_young += young_hit;
else stats.young_hit_gen += young_hit;
}
return current - pl->roots;
}
Noinline static int scan_pool_young(scanning_action action, void *data, pool *pl)
{
return scan_pool_aux(action, true, data, pl);
}
Noinline static int scan_pool_gen(scanning_action action, void *data, pool *pl)
{
return scan_pool_aux(action, false, data, pl);
}
static int scan_pool(scanning_action action, int only_young, void *data, pool *pl)
{
int res;
bxr_mutex_lock(&pl->mutex);
if (only_young) {
res = scan_pool_young(action, data, pl);
} else {
res = scan_pool_gen(action, data, pl);
}
bxr_mutex_unlock(&pl->mutex);
return res;
}
static int scan_ring(scanning_action action, int only_young,
void *data, pool **ring)
{
int work = 0;
pool *start_pool = *ring;
if (start_pool == NULL) return 0;
pool *p = start_pool;
do {
work += scan_pool(action, only_young, data, p);
p = p->next;
} while (p != start_pool);
return work;
}
static int scan_pools(scanning_action action, int only_young,
void *data, int dom_id)
{
bxr_domain_state *local = get_bxr_domain_state(dom_id);
int work = scan_ring(action, only_young, data, &local->young);
if (!only_young) work += scan_ring(action, 0, data, &local->old);
if (bxr_in_minor_collection()) {
reclassify_ring(&local->young, dom_id, OLD);
local->reordered_since_minor = false;
} else {
free_pool_ring(&local->free);
}
return work;
}
static void scan_roots(scanning_action action, int only_young,
void *data, int dom_id)
{
if (BOXROOT_DEBUG) validate_all_pools(dom_id);
move_current_to_young(dom_id);
adopt_orphaned_pools(dom_id);
int work = scan_pools(action, only_young, data, dom_id);
if (BOXROOT_STATS) {
if (only_young) stats.total_scanning_work_minor += work;
else stats.total_scanning_work_major += work;
}
if (BOXROOT_DEBUG) validate_all_pools(dom_id);
}
static long long time_counter(void)
{
#if defined(POSIX_CLOCK) && BOXROOT_STATS
STATS_INCR(time_counter_calls);
struct timespec t;
clock_gettime(CLOCK_MONOTONIC, &t);
return (long long)t.tv_sec * (long long)1000000000 + (long long)t.tv_nsec;
#else
return 0;
#endif
}
static long long kib_of_pools(long long count, int unit)
{
int log_per_pool = BXR_POOL_LOG_SIZE - unit * 10;
if (log_per_pool >= 0) return count << log_per_pool;
else return count >> -log_per_pool;
}
static double average(long long total, long long units)
{
return ((double)total) / (double)units;
}
void boxroot_print_stats()
{
printf("- minor collections: %'lld\n"
" major collections (and others): %'lld\n",
stats.minor_collections,
stats.major_collections);
#if defined(POSIX_CLOCK)
long long total_time_ns = time_counter() - stats.setup_time;
double total_time = (double)total_time_ns / 1000;
printf(" time since setup: %'.0fµs\n"
" incl. time counter calls: %'lld\n",
total_time, stats.time_counter_calls);
#endif
if (stats.total_alloced_pools == 0) return;
#if defined(POSIX_CLOCK)
double total_minor = (double)stats.total_minor_time / 1000;
double total_major = (double)stats.total_major_time / 1000;
double pct_minor = average(stats.total_minor_time, total_time_ns) * 100;
double pct_major = average(stats.total_major_time, total_time_ns) * 100;
double time_scan = total_minor + total_major;
double pct_scan = average(time_scan, total_time) * 100;
double time_minus_scan = total_time - time_scan;
double pct_minus_scan = average(time_minus_scan, total_time) * 100;
printf(" minor scan time: %'.0fµs (%.2f%%)\n"
" major scan time: %'.0fµs (%.2f%%)\n"
" time scanning: %'.0fµs (%.2f%%)\n"
" time minus scanning: %'.0fµs (%.2f%%)\n",
total_minor, pct_minor,
total_major, pct_major,
time_scan, pct_scan,
time_minus_scan, pct_minus_scan);
#endif
printf("- BXR_POOL_LOG_SIZE: %d (%'lld KiB, %'d roots/pool)\n"
" BOXROOT_STATS: %d\n"
" BOXROOT_DEBUG: %d\n"
" OCAML_MULTICORE: %d\n"
" BXR_MULTITHREAD: %d\n"
" BXR_FORCE_REMOTE: %d\n",
(int)BXR_POOL_LOG_SIZE, kib_of_pools(1, 1), (int)POOL_CAPACITY,
(int)BOXROOT_STATS, (int)BOXROOT_DEBUG, (int)OCAML_MULTICORE,
(int)BXR_MULTITHREAD, (int)BXR_FORCE_REMOTE);
printf("- total allocated pools: %'lld (%'lld MiB)\n"
" peak allocated pools: %'lld (%'lld MiB)\n"
" total emptied pools: %'lld (%'lld MiB)\n"
" total freed pools: %'lld (%'lld MiB)\n",
stats.total_alloced_pools,
kib_of_pools(stats.total_alloced_pools, 2),
stats.peak_pools,
kib_of_pools(stats.peak_pools, 2),
stats.total_emptied_pools,
kib_of_pools(stats.total_emptied_pools, 2),
stats.total_freed_pools,
kib_of_pools(stats.total_freed_pools, 2));
double scanning_work_minor =
average(stats.total_scanning_work_minor, stats.minor_collections);
double scanning_work_major =
average(stats.total_scanning_work_major, stats.major_collections);
long long total_scanning_work =
stats.total_scanning_work_minor + stats.total_scanning_work_major;
#if defined(POSIX_CLOCK)
double scanning_pace =
average(stats.total_minor_time + stats.total_major_time,
stats.total_scanning_work_minor + stats.total_scanning_work_major);
double scanning_pace_minor =
average(stats.total_minor_time, stats.total_scanning_work_minor);
double scanning_pace_major =
average(stats.total_major_time, stats.total_scanning_work_major);
#endif
#if BOXROOT_DEBUG
double young_hits_gen_pct =
average(stats.young_hit_gen * 100, stats.total_scanning_work_major);
#endif
double young_hits_young_pct =
average(stats.young_hit_young * 100, stats.total_scanning_work_minor);
printf("- work per minor: %'.0f\n"
" work per major: %'.0f\n"
" total scanning work: %'lld (%'lld minor, %'lld major)\n"
#if defined(POSIX_CLOCK)
" total scanning pace (ns per work): %'.2f (%'.2f minor, %'.2f major)\n"
#endif
#if BOXROOT_DEBUG
" young hits (non-minor collection): %.2f%%\n"
#endif
" young hits (minor collection): %.2f%%\n",
scanning_work_minor,
scanning_work_major,
total_scanning_work, stats.total_scanning_work_minor, stats.total_scanning_work_major,
#if defined(POSIX_CLOCK)
scanning_pace, scanning_pace_minor, scanning_pace_major,
#endif
#if BOXROOT_DEBUG
young_hits_gen_pct,
#endif
young_hits_young_pct);
#if defined(POSIX_CLOCK)
double time_per_minor =
average(stats.total_minor_time, stats.minor_collections) / 1000;
double time_per_major =
average(stats.total_major_time, stats.major_collections) / 1000;
printf("- average scan time per minor: %'.3fµs\n"
" average scan time per major: %'.3fµs\n"
" peak time per minor: %'.3fµs\n"
" peak time per major: %'.3fµs\n",
time_per_minor,
time_per_major,
((double)stats.peak_minor_time) / 1000,
((double)stats.peak_major_time) / 1000);
#endif
printf("- total boxroot_create_slow: %'lld\n"
" total boxroot_modify_slow: %'lld\n"
" total boxroot_delete_aux local: %'lld\n",
stats.total_create_slow,
stats.total_modify_slow,
stats.total_delete_aux_local);
double old_reordering_per_minor =
average(stats.old_reordering, stats.minor_collections);
double old_reordering_time = (double)stats.old_reordering_time / 1000;
double old_reordering_time_pct =
average(stats.old_reordering_time, stats.total_minor_time) * 100;
printf(" total boxroot_delete_aux remote domain: %'lld\n"
" total boxroot_delete_aux remote no domain: %'lld\n"
" old pools reordered: %.2f per minor (%'.3fµs total, %.2f%% minor scan time)\n",
stats.total_delete_remote_domain,
stats.total_delete_remote_nolock,
old_reordering_per_minor, old_reordering_time, old_reordering_time_pct);
#if defined(POSIX_CLOCK)
double create_slow_time = (double)stats.total_create_slow_time / 1000;
double pct_create = average(stats.total_create_slow_time, total_time_ns) * 100;
printf(" boxroot_create_slow time: %'.0fµs (%.2f%%)\n",
create_slow_time, pct_create);
#endif
#if BOXROOT_DEBUG
double ring_operations_per_pool =
average(stats.ring_operations, stats.total_alloced_pools);
printf("- total ring operations: %'lld\n"
" ring operations per pool: %.2f\n",
stats.ring_operations,
ring_operations_per_pool);
long long total_create = stats.total_create_young + stats.total_create_old;
long long total_delete = stats.total_delete_young + stats.total_delete_old;
double create_young_pct =
average(stats.total_create_young * 100, total_create);
double delete_young_pct =
average(stats.total_delete_young * 100, total_delete);
printf(" total created: %'lld (%.2f%% young)\n"
" total deleted: %'lld (%.2f%% young)\n"
" total modified: %'lld\n",
total_create, create_young_pct,
total_delete, delete_young_pct,
stats.total_modify);
printf(" is_pool_member: %'lld\n",
stats.is_pool_member);
#endif
}
static void scanning_callback(scanning_action action, int only_young,
void *data)
{
if (boxroot_status() == BOXROOT_NOT_SETUP
|| boxroot_status() == BOXROOT_TORE_DOWN) return;
bool in_minor_collection = bxr_in_minor_collection();
if (in_minor_collection) STATS_INCR(minor_collections);
else STATS_INCR(major_collections);
int dom_id = Domain_id;
if (get_bxr_domain_state(dom_id) == NULL) return;
#if !OCAML_MULTICORE
if (!bxr_check_thread_hooks()) status = BOXROOT_INVALID;
#endif
long long start = time_counter();
scan_roots(action, only_young, data, dom_id);
long long duration = time_counter() - start;
if (BOXROOT_STATS) {
atomic_llong *total = in_minor_collection ? &stats.total_minor_time : &stats.total_major_time;
atomic_llong *peak = in_minor_collection ? &stats.peak_minor_time : &stats.peak_major_time;
*total += duration;
if (duration > *peak) *peak = duration; }
}
static void domain_termination_callback()
{
DEBUGassert(OCAML_MULTICORE == 1);
int dom_id = Domain_id;
orphan_pools(dom_id);
}
static mutex_t init_mutex = BXR_MUTEX_INITIALIZER;
static bool setup()
{
if (Caml_state_opt == NULL) {
errno = EPERM;
return false;
}
if (boxroot_status() == BOXROOT_RUNNING) return true;
bool res = true;
bxr_mutex_lock(&init_mutex);
if (status != BOXROOT_NOT_SETUP) {
res = (status == BOXROOT_RUNNING);
goto out;
}
#if !OCAML_MULTICORE
bxr_init_systhreads();
#endif
init_bxr_domain_state();
bxr_setup_hooks(&scanning_callback, &domain_termination_callback);
stats.setup_time = time_counter();
status = BOXROOT_RUNNING;
out:
bxr_mutex_unlock(&init_mutex);
return res;
}
bool boxroot_setup(void) { return setup(); }
void boxroot_teardown()
{
bxr_mutex_lock(&init_mutex);
if (status != BOXROOT_RUNNING) goto out;
status = BOXROOT_TORE_DOWN;
free_domain_state();
free_pool_rings(&orphan);
out:
bxr_mutex_unlock(&init_mutex);
}
char const * boxroot_error_string(void)
{
int status = boxroot_status();
switch (status) {
case BOXROOT_TORE_DOWN: return "boxroot_teardown has previously been called";
case BOXROOT_INVALID: return "Ensure boxroot_setup() is called as documented";
case BOXROOT_RUNNING:
case BOXROOT_NOT_SETUP:
if (errno == EPERM) {
return "You tried calling boxroot_create, boxroot_modify, or boxroot_setup "
"without holding the domain lock";
} else if (errno == ENOMEM) {
return "Allocation failure of the backing store";
}
default:
return "Unknown error";
}
}