#include "utils/commons.h"
#include "wavefront_backtrace_buffer.h"
#include "wavefront_sequences.h"
#define BT_BUFFER_SEGMENT_LENGTH BUFFER_SIZE_8M
#define BT_BUFFER_SEGMENT_IDX(block_idx) ((block_idx)/BT_BUFFER_SEGMENT_LENGTH)
#define BT_BUFFER_SEGMENT_OFFSET(block_idx) ((block_idx)%BT_BUFFER_SEGMENT_LENGTH)
#define BT_BUFFER_IDX(segment_idx,segment_offset) \
((segment_idx)*BT_BUFFER_SEGMENT_LENGTH) + (segment_offset)
void wf_backtrace_buffer_segment_add(
wf_backtrace_buffer_t* const bt_buffer) {
bt_block_t* const bt_segment = mm_allocator_calloc(
bt_buffer->mm_allocator,BT_BUFFER_SEGMENT_LENGTH,bt_block_t,false);
vector_insert(bt_buffer->segments,bt_segment,bt_block_t*);
}
void wf_backtrace_buffer_segment_reserve(
wf_backtrace_buffer_t* const bt_buffer) {
bt_buffer->segment_offset = 0;
++(bt_buffer->segment_idx);
if (bt_buffer->segment_idx >= vector_get_used(bt_buffer->segments)) {
const uint64_t block_idx = ((uint64_t)bt_buffer->segment_idx+1) * BT_BUFFER_SEGMENT_LENGTH;
if (block_idx >= BT_BLOCK_IDX_MAX) {
fprintf(stderr,"[WFA::BacktraceBuffer] Reached maximum addressable index"); exit(-1);
}
wf_backtrace_buffer_segment_add(bt_buffer);
}
bt_block_t** const segments = vector_get_mem(bt_buffer->segments,bt_block_t*);
bt_buffer->block_next = segments[bt_buffer->segment_idx];
}
wf_backtrace_buffer_t* wf_backtrace_buffer_new(
mm_allocator_t* const mm_allocator) {
wf_backtrace_buffer_t* const bt_buffer =
mm_allocator_alloc(mm_allocator,wf_backtrace_buffer_t);
bt_buffer->mm_allocator = mm_allocator;
bt_buffer->segment_idx = 0;
bt_buffer->segment_offset = 0;
bt_buffer->segments = vector_new(10,bt_block_t*);
wf_backtrace_buffer_segment_add(bt_buffer); bt_buffer->block_next = vector_get_mem(bt_buffer->segments,bt_block_t*)[0];
bt_buffer->num_compacted_blocks = 0;
bt_buffer->num_compactions = 0;
bt_buffer->alignment_init_pos = vector_new(100,wf_backtrace_init_pos_t);
bt_buffer->alignment_packed = vector_new(100,pcigar_t);
bt_buffer->prefetch_blocks_idxs = vector_new(500,bt_block_idx_t);
return bt_buffer;
}
void wf_backtrace_buffer_clear(
wf_backtrace_buffer_t* const bt_buffer) {
bt_buffer->segment_idx = 0;
bt_buffer->segment_offset = 0;
bt_buffer->block_next = vector_get_mem(bt_buffer->segments,bt_block_t*)[0];
bt_buffer->num_compacted_blocks = 0;
bt_buffer->num_compactions = 0;
vector_clear(bt_buffer->alignment_init_pos);
}
void wf_backtrace_buffer_reap(
wf_backtrace_buffer_t* const bt_buffer) {
const int num_segments = vector_get_used(bt_buffer->segments);
bt_block_t** const segments = vector_get_mem(bt_buffer->segments,bt_block_t*);
int i;
for (i=1;i<num_segments;++i) {
mm_allocator_free(bt_buffer->mm_allocator,segments[i]);
}
vector_set_used(bt_buffer->segments,1);
bt_buffer->segment_idx = 0;
bt_buffer->segment_offset = 0;
bt_buffer->block_next = vector_get_mem(bt_buffer->segments,bt_block_t*)[0];
bt_buffer->num_compacted_blocks = 0;
bt_buffer->num_compactions = 0;
}
void wf_backtrace_buffer_delete(
wf_backtrace_buffer_t* const bt_buffer) {
const int num_segments = vector_get_used(bt_buffer->segments);
bt_block_t** const segments = vector_get_mem(bt_buffer->segments,bt_block_t*);
int i;
for (i=0;i<num_segments;++i) {
mm_allocator_free(bt_buffer->mm_allocator,segments[i]);
}
vector_delete(bt_buffer->segments);
vector_delete(bt_buffer->alignment_init_pos);
vector_delete(bt_buffer->alignment_packed);
vector_delete(bt_buffer->prefetch_blocks_idxs);
mm_allocator_free(bt_buffer->mm_allocator,bt_buffer);
}
uint64_t wf_backtrace_buffer_get_used(
wf_backtrace_buffer_t* const bt_buffer) {
const bt_block_idx_t max_block_idx = BT_BUFFER_IDX(bt_buffer->segment_idx,bt_buffer->segment_offset);
return max_block_idx;
}
bt_block_idx_t wf_backtrace_buffer_get_num_compacted_blocks(
wf_backtrace_buffer_t* const bt_buffer) {
return bt_buffer->num_compacted_blocks;
}
void wf_backtrace_buffer_set_num_compacted_blocks(
wf_backtrace_buffer_t* const bt_buffer,
const bt_block_idx_t num_compacted_blocks) {
bt_buffer->num_compacted_blocks = num_compacted_blocks;
}
void wf_backtrace_buffer_reset_compaction(
wf_backtrace_buffer_t* const bt_buffer) {
bt_buffer->num_compactions = 0;
bt_buffer->num_compacted_blocks = 0;
}
uint64_t wf_backtrace_buffer_get_size_allocated(
wf_backtrace_buffer_t* const bt_buffer) {
const uint64_t segments_used = vector_get_used(bt_buffer->segments);
return segments_used*BT_BUFFER_SEGMENT_LENGTH*sizeof(bt_block_t);
}
uint64_t wf_backtrace_buffer_get_size_used(
wf_backtrace_buffer_t* const bt_buffer) {
const bt_block_idx_t max_block_idx = BT_BUFFER_IDX(bt_buffer->segment_idx,bt_buffer->segment_offset);
return max_block_idx*sizeof(bt_block_t);
}
void wf_backtrace_buffer_prefetch_block(
wf_backtrace_buffer_t* const bt_buffer,
const bt_block_idx_t block_idx) {
const int segment_idx = BT_BUFFER_SEGMENT_IDX(block_idx);
const int segment_offset = BT_BUFFER_SEGMENT_OFFSET(block_idx);
bt_block_t** const segments = vector_get_mem(bt_buffer->segments,bt_block_t*);
PREFETCH(segments[segment_idx]+segment_offset);
}
bt_block_t* wf_backtrace_buffer_get_block(
wf_backtrace_buffer_t* const bt_buffer,
const bt_block_idx_t block_idx) {
const int segment_idx = BT_BUFFER_SEGMENT_IDX(block_idx);
const int segment_offset = BT_BUFFER_SEGMENT_OFFSET(block_idx);
bt_block_t** const segments = vector_get_mem(bt_buffer->segments,bt_block_t*);
return &(segments[segment_idx][segment_offset]);
}
void wf_backtrace_buffer_add_used(
wf_backtrace_buffer_t* const bt_buffer,
const int used) {
bt_buffer->segment_offset += used;
bt_buffer->block_next += used;
if (bt_buffer->segment_offset >= BT_BUFFER_SEGMENT_LENGTH) {
wf_backtrace_buffer_segment_reserve(bt_buffer);
}
}
bt_block_idx_t wf_backtrace_buffer_get_mem(
wf_backtrace_buffer_t* const bt_buffer,
bt_block_t** const bt_block_mem,
int* const bt_blocks_available) {
const int segment_idx = bt_buffer->segment_idx;
const int segment_offset = bt_buffer->segment_offset;
*bt_block_mem = bt_buffer->block_next;
*bt_blocks_available = BT_BUFFER_SEGMENT_LENGTH - bt_buffer->segment_offset;
return BT_BUFFER_IDX(segment_idx,segment_offset);
}
void wf_backtrace_buffer_store_block(
wf_backtrace_buffer_t* const bt_buffer,
const pcigar_t pcigar,
const bt_block_idx_t prev_idx) {
bt_buffer->block_next->pcigar = pcigar;
bt_buffer->block_next->prev_idx = prev_idx;
++(bt_buffer->block_next);
++(bt_buffer->segment_offset);
if (bt_buffer->segment_offset >= BT_BUFFER_SEGMENT_LENGTH) {
wf_backtrace_buffer_segment_reserve(bt_buffer);
}
}
bt_block_idx_t wf_backtrace_buffer_init_block(
wf_backtrace_buffer_t* const bt_buffer,
const int v,
const int h) {
const int segment_idx = bt_buffer->segment_idx;
const int segment_offset = bt_buffer->segment_offset;
const int init_position_offset = vector_get_used(bt_buffer->alignment_init_pos);
wf_backtrace_init_pos_t init_pos = { .v = v, .h = h };
vector_insert(bt_buffer->alignment_init_pos,init_pos,wf_backtrace_init_pos_t);
wf_backtrace_buffer_store_block(bt_buffer,init_position_offset,BT_BLOCK_IDX_NULL);
return BT_BUFFER_IDX(segment_idx,segment_offset);
}
bt_block_t* wf_backtrace_buffer_traceback_pcigar(
wf_backtrace_buffer_t* const bt_buffer,
bt_block_t* bt_block) {
vector_t* const alignment_packed = bt_buffer->alignment_packed;
vector_clear(alignment_packed);
while (bt_block->prev_idx != BT_BLOCK_IDX_NULL) {
vector_insert(alignment_packed,bt_block->pcigar,pcigar_t);
const bt_block_idx_t prev_idx = bt_block->prev_idx;
bt_block = wf_backtrace_buffer_get_block(bt_buffer,prev_idx);
}
return bt_block;
}
void wf_backtrace_buffer_unpack_cigar_linear(
wf_backtrace_buffer_t* const bt_buffer,
wavefront_sequences_t* const sequences,
const int begin_v,
const int begin_h,
const int end_v,
const int end_h,
cigar_t* const cigar) {
const int pattern_length = sequences->pattern_length;
const int text_length = sequences->text_length;
cigar_clear(cigar);
char* cigar_buffer = cigar->operations;
int i;
int v = begin_v;
int h = begin_h;
for (i=0;i<h;++i) {*cigar_buffer = 'I'; ++cigar_buffer;};
for (i=0;i<v;++i) {*cigar_buffer = 'D'; ++cigar_buffer;};
const int num_palignment_blocks = vector_get_used(bt_buffer->alignment_packed);
pcigar_t* const palignment_blocks = vector_get_mem(bt_buffer->alignment_packed,pcigar_t);
for (i=num_palignment_blocks-1;i>=0;--i) {
int cigar_block_length = 0;
pcigar_unpack_linear(
palignment_blocks[i],sequences,&v,&h,
cigar_buffer,&cigar_block_length);
cigar_buffer += cigar_block_length;
}
const int num_matches = MIN(end_v-v,end_h-h);
for (i=0;i<num_matches;++i) {*cigar_buffer = 'M'; ++cigar_buffer;};
v += num_matches;
h += num_matches;
while (h < text_length) {*cigar_buffer = 'I'; ++cigar_buffer; ++h;};
while (v < pattern_length) {*cigar_buffer = 'D'; ++cigar_buffer; ++v;};
*cigar_buffer = '\0';
cigar->end_offset = cigar_buffer - cigar->operations;
}
void wf_backtrace_buffer_unpack_cigar_affine(
wf_backtrace_buffer_t* const bt_buffer,
wavefront_sequences_t* const sequences,
const int begin_v,
const int begin_h,
const int end_v,
const int end_h,
cigar_t* const cigar) {
const int pattern_length = sequences->pattern_length;
const int text_length = sequences->text_length;
cigar_clear(cigar);
char* cigar_buffer = cigar->operations;
int i;
int v = begin_v;
int h = begin_h;
for (i=0;i<h;++i) {*cigar_buffer = 'I'; ++cigar_buffer;};
for (i=0;i<v;++i) {*cigar_buffer = 'D'; ++cigar_buffer;};
const int num_palignment_blocks = vector_get_used(bt_buffer->alignment_packed);
pcigar_t* const palignment_blocks = vector_get_mem(bt_buffer->alignment_packed,pcigar_t);
affine_matrix_type current_matrix_type = affine_matrix_M;
for (i=num_palignment_blocks-1;i>=0;--i) {
int cigar_block_length = 0;
pcigar_unpack_affine(
palignment_blocks[i],sequences,&v,&h,
cigar_buffer,&cigar_block_length,¤t_matrix_type);
cigar_buffer += cigar_block_length;
}
const int num_matches = MIN(end_v-v,end_h-h);
for (i=0;i<num_matches;++i) {*cigar_buffer = 'M'; ++cigar_buffer;};
v += num_matches;
h += num_matches;
while (h < text_length) {*cigar_buffer = 'I'; ++cigar_buffer; ++h;};
while (v < pattern_length) {*cigar_buffer = 'D'; ++cigar_buffer; ++v;};
*cigar_buffer = '\0';
cigar->end_offset = cigar_buffer - cigar->operations;
}
void wf_backtrace_buffer_mark_backtrace(
wf_backtrace_buffer_t* const bt_buffer,
const bt_block_idx_t bt_block_idx,
bitmap_t* const bitmap) {
const bt_block_idx_t num_compacted_blocks = bt_buffer->num_compacted_blocks;
bt_block_t bt_block_last = { .prev_idx = bt_block_idx };
bt_block_t* bt_block = &bt_block_last;
while (bt_block->prev_idx != BT_BLOCK_IDX_NULL &&
bt_block->prev_idx >= num_compacted_blocks &&
!bitmap_check__set(bitmap,bt_block->prev_idx)) {
const bt_block_idx_t prev_idx = bt_block->prev_idx;
bt_block = wf_backtrace_buffer_get_block(bt_buffer,prev_idx);
}
}
void wf_backtrace_buffer_mark_backtrace_batch(
wf_backtrace_buffer_t* const bt_buffer,
wf_offset_t* const offsets,
bt_block_idx_t* const bt_block_idxs,
const int num_block_idxs,
bitmap_t* const bitmap) {
const bt_block_idx_t num_compacted_blocks = bt_buffer->num_compacted_blocks;
const int max_batch_size = 100;
vector_reserve(bt_buffer->prefetch_blocks_idxs,max_batch_size,false);
bt_block_idx_t* const pf_block_idx = vector_get_mem(bt_buffer->prefetch_blocks_idxs,bt_block_idx_t);
int active_blocks = 0, next_idx = 0;
while (active_blocks < max_batch_size && next_idx < num_block_idxs) {
const bt_block_idx_t block_idx = bt_block_idxs[next_idx];
if (offsets[next_idx] >= 0 &&
block_idx >= num_compacted_blocks) { BITMAP_PREFETCH_BLOCK(bitmap,block_idx);
wf_backtrace_buffer_prefetch_block(bt_buffer,block_idx);
pf_block_idx[active_blocks] = block_idx;
++active_blocks;
}
++next_idx; }
int i = 0;
while (active_blocks > 0) {
const bt_block_idx_t block_idx = pf_block_idx[i];
BITMAP_GET_BLOCK(bitmap,block_idx,block_bm_ptr);
if (!BM_BLOCK_IS_SET(*block_bm_ptr,block_idx)) {
BM_BLOCK_SET(*block_bm_ptr,block_idx);
bt_block_t* const bt_block = wf_backtrace_buffer_get_block(bt_buffer,block_idx);
const bt_block_idx_t prev_block_idx = bt_block->prev_idx;
if (prev_block_idx != BT_BLOCK_IDX_NULL &&
prev_block_idx >= num_compacted_blocks) {
pf_block_idx[i] = prev_block_idx;
BITMAP_PREFETCH_BLOCK(bitmap,prev_block_idx);
wf_backtrace_buffer_prefetch_block(bt_buffer,prev_block_idx);
i = (i+1) % active_blocks;
continue;
}
}
while (true ) {
if (next_idx < num_block_idxs) {
if (offsets[next_idx] < 0 || bt_block_idxs[next_idx] < num_compacted_blocks) {
++next_idx; continue;
}
const bt_block_idx_t block_idx = bt_block_idxs[next_idx];
BITMAP_PREFETCH_BLOCK(bitmap,block_idx);
wf_backtrace_buffer_prefetch_block(bt_buffer,block_idx);
pf_block_idx[i] = block_idx;
++next_idx;
i = (i+1) % active_blocks;
break;
} else {
--active_blocks;
pf_block_idx[i] = pf_block_idx[active_blocks];
if (active_blocks) i = (i+1) % active_blocks;
break;
}
}
}
}
bt_block_idx_t wf_backtrace_buffer_compact_marked(
wf_backtrace_buffer_t* const bt_buffer,
bitmap_t* const bitmap,
const int verbose) {
const int num_segments = vector_get_used(bt_buffer->segments);
bt_block_t** const segments = vector_get_mem(bt_buffer->segments,bt_block_t*);
const bt_block_idx_t num_compacted_blocks = bt_buffer->num_compacted_blocks;
bt_block_idx_t read_global_pos = num_compacted_blocks;
bt_block_idx_t write_global_pos = num_compacted_blocks;
bt_block_idx_t read_segidx = BT_BUFFER_SEGMENT_IDX(read_global_pos);
bt_block_idx_t read_offset = BT_BUFFER_SEGMENT_OFFSET(read_global_pos);
bt_block_idx_t write_segidx = BT_BUFFER_SEGMENT_IDX(write_global_pos);
bt_block_idx_t write_offset = BT_BUFFER_SEGMENT_OFFSET(write_global_pos);
bt_block_t* read_block = segments[read_segidx] + read_offset;
bt_block_t* write_block = segments[write_segidx] + write_offset;
const bt_block_idx_t max_block_idx = BT_BUFFER_IDX(bt_buffer->segment_idx,bt_buffer->segment_offset);
while (read_global_pos < max_block_idx) {
BITMAP_GET_BLOCK(bitmap,read_global_pos,block_bitmap_ptr);
if (BM_BLOCK_IS_SET(*block_bitmap_ptr,read_global_pos)) {
write_block->pcigar = read_block->pcigar;
if (read_block->prev_idx == BT_BLOCK_IDX_NULL ||
read_block->prev_idx < num_compacted_blocks) {
write_block->prev_idx = read_block->prev_idx;
} else {
write_block->prev_idx = num_compacted_blocks + bitmap_erank(bitmap,read_block->prev_idx);
}
++write_offset; ++write_block; ++write_global_pos;
if (write_offset >= BT_BUFFER_SEGMENT_LENGTH) {
write_block = segments[++write_segidx];
write_offset = 0;
}
}
++read_offset; ++read_block; ++read_global_pos;
if (read_offset >= BT_BUFFER_SEGMENT_LENGTH) {
if (++read_segidx >= num_segments) break;
read_block = segments[read_segidx];
read_offset = 0;
}
}
bt_buffer->segment_offset = write_offset;
bt_buffer->segment_idx = write_segidx;
bt_buffer->block_next = write_block;
bt_buffer->num_compactions++;
if (verbose >= 3) {
fprintf(stderr,"[WFA::BacktraceBuffer] Compacted from %lu MB to %lu MB (%2.2f%%)",
CONVERT_B_TO_MB(read_global_pos*sizeof(bt_block_t)),
CONVERT_B_TO_MB(write_global_pos*sizeof(bt_block_t)),
100.0f*(float)write_global_pos/(float)read_global_pos);
}
return write_global_pos - 1;
}