#include "utils/commons.h"
#include "system/mm_allocator.h"
#include "wavefront_pcigar.h"
typedef struct {
char operation;
int inc_v;
int inc_h;
affine_matrix_type matrix_type;
} pcigar_op_t;
pcigar_op_t pcigar_lut[4] = {
{ .operation = '?', .inc_v = 0, .inc_h = 0, .matrix_type = affine_matrix_M }, { .operation = 'D', .inc_v = 1, .inc_h = 0, .matrix_type = affine_matrix_D }, { .operation = 'X', .inc_v = 1, .inc_h = 1, .matrix_type = affine_matrix_M }, { .operation = 'I', .inc_v = 0, .inc_h = 1, .matrix_type = affine_matrix_I }, };
char matches_lut[8] = "MMMMMMMM";
#define CIGAR_8MATCHES_UINT64 *((uint64_t*)matches_lut)
int pcigar_get_length(
const pcigar_t pcigar) {
int cigar_length = PCIGAR_MAX_LENGTH;
if (!PCIGAR_IS_UTILISED(pcigar,PCIGAR_FULL_MASK)) {
const int free_slots = PCIGAR_FREE_SLOTS(pcigar);
cigar_length -= free_slots;
}
return cigar_length;
}
int pcigar_unpack(
pcigar_t pcigar,
char* cigar_buffer) {
int pcigar_length = PCIGAR_MAX_LENGTH;
if (!PCIGAR_IS_UTILISED(pcigar,PCIGAR_FULL_MASK)) {
const int free_slots = PCIGAR_FREE_SLOTS(pcigar);
pcigar_length -= free_slots;
pcigar <<= free_slots*2;
}
int i;
for (i=0;i<pcigar_length;++i) {
const int cigar_op = (int)PCIGAR_EXTRACT(pcigar); PCIGAR_POP_FRONT(pcigar); pcigar_op_t* const op = pcigar_lut + cigar_op;
*(cigar_buffer++) = op->operation;
}
return pcigar_length;
}
int pcigar_unpack_extend(
const char* const pattern,
const int pattern_length,
const char* const text,
const int text_length,
int v,
int h,
char* cigar_buffer) {
int num_matches = 0;
uint64_t* pattern_blocks = (uint64_t*)(pattern+v);
uint64_t* text_blocks = (uint64_t*)(text+h);
uint64_t pattern_block = *pattern_blocks;
uint64_t text_block = *text_blocks;
uint64_t cmp = pattern_block ^ text_block;
while (cmp==0 && (v+8) < pattern_length && (h+8) < text_length) {
v += 8;
h += 8;
num_matches += 8;
*((uint64_t*)cigar_buffer) = CIGAR_8MATCHES_UINT64;
cigar_buffer += 8;
++pattern_blocks;
++text_blocks;
pattern_block = *pattern_blocks;
text_block = *text_blocks;
cmp = pattern_block ^ text_block;
}
num_matches += __builtin_ctzl(cmp)/8;
*((uint64_t*)cigar_buffer) = CIGAR_8MATCHES_UINT64;
return num_matches;
}
int pcigar_unpack_extend_custom(
const int pattern_length,
const int text_length,
alignment_match_funct_t const match_funct,
void* const match_funct_arguments,
int v,
int h,
char* cigar_buffer) {
int num_matches = 0;
while (v < pattern_length && h < text_length) {
if (!match_funct(v,h,match_funct_arguments)) break;
++v; ++h;
*cigar_buffer = 'M';
++cigar_buffer;
++num_matches;
}
return num_matches;
}
void pcigar_unpack_linear(
pcigar_t pcigar,
wavefront_sequences_t* const sequences,
int* const v_pos,
int* const h_pos,
char* cigar_buffer,
int* const cigar_length) {
char* const pattern = sequences->pattern;
const int pattern_length = sequences->pattern_length;
char* const text = sequences->text;
const int text_length = sequences->text_length;
char* const cigar_buffer_base = cigar_buffer;
int pcigar_length = PCIGAR_MAX_LENGTH;
if (!PCIGAR_IS_UTILISED(pcigar,PCIGAR_FULL_MASK)) {
const int free_slots = PCIGAR_FREE_SLOTS(pcigar);
pcigar_length -= free_slots;
pcigar <<= free_slots*2;
}
int v = *v_pos, h = *h_pos, i;
for (i=0;i<pcigar_length;++i) {
int num_matches;
if (sequences->mode == wf_sequences_lambda) { num_matches = pcigar_unpack_extend_custom(
pattern_length,text_length,sequences->match_funct,
sequences->match_funct_arguments,v,h,cigar_buffer);
} else {
num_matches = pcigar_unpack_extend(
pattern,pattern_length,text,text_length,v,h,cigar_buffer);
}
v += num_matches;
h += num_matches;
cigar_buffer += num_matches;
const int cigar_op = (int)PCIGAR_EXTRACT(pcigar); PCIGAR_POP_FRONT(pcigar); pcigar_op_t* const op = pcigar_lut + cigar_op;
*(cigar_buffer++) = op->operation;
v += op->inc_v;
h += op->inc_h;
}
*cigar_length = cigar_buffer - cigar_buffer_base;
*v_pos = v;
*h_pos = h;
}
void pcigar_unpack_affine(
pcigar_t pcigar,
wavefront_sequences_t* const sequences,
int* const v_pos,
int* const h_pos,
char* cigar_buffer,
int* const cigar_length,
affine_matrix_type* const current_matrix_type) {
char* const pattern = sequences->pattern;
const int pattern_length = sequences->pattern_length;
char* const text = sequences->text;
const int text_length = sequences->text_length;
char* const cigar_buffer_base = cigar_buffer;
int pcigar_length = PCIGAR_MAX_LENGTH;
if (!PCIGAR_IS_UTILISED(pcigar,PCIGAR_FULL_MASK)) {
const int free_slots = PCIGAR_FREE_SLOTS(pcigar);
pcigar_length -= free_slots;
pcigar <<= free_slots*2;
}
affine_matrix_type matrix_type = *current_matrix_type;
int v = *v_pos, h = *h_pos, i;
for (i=0;i<pcigar_length;++i) {
if (matrix_type == affine_matrix_M) { int num_matches;
if (sequences->mode == wf_sequences_lambda) { num_matches = pcigar_unpack_extend_custom(
pattern_length,text_length,sequences->match_funct,
sequences->match_funct_arguments,v,h,cigar_buffer);
} else {
num_matches = pcigar_unpack_extend(
pattern,pattern_length,text,text_length,v,h,cigar_buffer);
}
v += num_matches;
h += num_matches;
cigar_buffer += num_matches;
}
const int cigar_op = (int)PCIGAR_EXTRACT(pcigar); PCIGAR_POP_FRONT(pcigar); pcigar_op_t* const op = pcigar_lut + cigar_op;
if (matrix_type != affine_matrix_M && op->operation=='X') {
matrix_type = affine_matrix_M;
continue;
}
*(cigar_buffer++) = op->operation;
v += op->inc_v;
h += op->inc_h;
matrix_type = op->matrix_type;
}
*cigar_length = cigar_buffer - cigar_buffer_base;
*v_pos = v;
*h_pos = h;
*current_matrix_type = matrix_type;
}
void pcigar_print(
FILE* const stream,
pcigar_t pcigar) {
char cigar_buffer[64];
const int pcigar_length = pcigar_unpack(pcigar,cigar_buffer);
cigar_buffer[pcigar_length] = '\0';
fprintf(stream,"%s",cigar_buffer);
}