#include "wavefront_sequences.h"
#define WF_SEQUENCES_PADDING 64
#define WF_SEQUENCES_PATTERN_EOS '!'
#define WF_SEQUENCES_TEXT_EOS '?'
void wavefront_sequences_allocate(
wavefront_sequences_t* const wf_sequences) {
wf_sequences->mode = wf_sequences_ascii;
wf_sequences->reverse = false;
wf_sequences->seq_buffer = NULL;
wf_sequences->seq_buffer_allocated = 0;
wf_sequences->pattern = NULL;
wf_sequences->text = NULL;
}
void wavefront_sequences_free(
wavefront_sequences_t* const wf_sequences) {
if (wf_sequences->seq_buffer != NULL) free(wf_sequences->seq_buffer);
}
void wavefront_sequences_init_allocate(
wavefront_sequences_t* const wf_sequences,
const int pattern_length,
const int text_length) {
const int buffer_size = pattern_length + text_length + 3*WF_SEQUENCES_PADDING;
if (wf_sequences->seq_buffer_allocated < buffer_size) {
if (wf_sequences->seq_buffer != NULL) free(wf_sequences->seq_buffer);
const int proposed_size = buffer_size + buffer_size/2;
wf_sequences->seq_buffer = calloc(proposed_size,1);
wf_sequences->seq_buffer_allocated = proposed_size;
}
wf_sequences->pattern_buffer = wf_sequences->seq_buffer + WF_SEQUENCES_PADDING;
wf_sequences->text_buffer = wf_sequences->seq_buffer + WF_SEQUENCES_PADDING + pattern_length + WF_SEQUENCES_PADDING;
}
void wavefront_sequences_init_copy(
char* const buffer_dst,
const char* const sequence,
const int sequence_length,
const int padding_length,
const char padding_value,
const bool reverse) {
if (reverse) {
int i;
for (i=0;i<sequence_length;i++) {
buffer_dst[i] = sequence[sequence_length-1-i];
}
} else {
memcpy(buffer_dst,sequence,sequence_length);
}
buffer_dst[sequence_length] = padding_value;
}
void wavefront_sequences_init_decode2bits(
char* const buffer_dst,
const uint8_t* const sequence,
const int sequence_length,
const int padding_length,
const char padding_value,
const bool reverse) {
const char dna_packed2bits_decode[4] = {'A','C','G','T'};
const int num_words = DIV_CEIL(sequence_length,8);
int buffer_pos = (reverse) ? sequence_length-1 : 0;
int word_num;
for (word_num=0;word_num<num_words;++word_num) {
const uint8_t word = sequence[word_num];
const char letter0 = dna_packed2bits_decode[(word & 3)];
const char letter1 = dna_packed2bits_decode[(word>>2 & 3)];
const char letter2 = dna_packed2bits_decode[(word>>4 & 3)];
const char letter3 = dna_packed2bits_decode[(word>>6 & 3)];
if (reverse) {
buffer_dst[buffer_pos ] = letter0;
buffer_dst[buffer_pos-1] = letter1;
buffer_dst[buffer_pos-2] = letter2;
buffer_dst[buffer_pos-3] = letter3;
buffer_pos -= 4;
} else {
buffer_dst[buffer_pos ] = letter0;
buffer_dst[buffer_pos+1] = letter1;
buffer_dst[buffer_pos+2] = letter2;
buffer_dst[buffer_pos+3] = letter3;
buffer_pos += 4;
}
}
buffer_dst[sequence_length] = padding_value;
}
void wavefront_sequences_init_ascii(
wavefront_sequences_t* const wf_sequences,
const char* const pattern,
const int pattern_length,
const char* const text,
const int text_length,
const bool reverse) {
wf_sequences->mode = wf_sequences_ascii;
wf_sequences->reverse = reverse;
wavefront_sequences_init_allocate(wf_sequences,pattern_length,text_length);
wavefront_sequences_init_copy(wf_sequences->pattern_buffer,
pattern,pattern_length,WF_SEQUENCES_PADDING,WF_SEQUENCES_PATTERN_EOS,reverse);
wf_sequences->pattern_buffer_length = pattern_length;
wavefront_sequences_init_copy(wf_sequences->text_buffer,
text,text_length,WF_SEQUENCES_PADDING,WF_SEQUENCES_TEXT_EOS,reverse);
wf_sequences->text_buffer_length = text_length;
wf_sequences->pattern = wf_sequences->pattern_buffer;
wf_sequences->pattern_begin = 0;
wf_sequences->pattern_length = pattern_length;
wf_sequences->pattern_eos = wf_sequences->pattern[pattern_length];
wf_sequences->text = wf_sequences->text_buffer;
wf_sequences->text_begin = 0;
wf_sequences->text_length = text_length;
wf_sequences->text_eos = wf_sequences->text[text_length];
}
void wavefront_sequences_init_lambda(
wavefront_sequences_t* const wf_sequences,
alignment_match_funct_t match_funct,
void* match_funct_arguments,
const int pattern_length,
const int text_length,
const bool reverse) {
wf_sequences->mode = wf_sequences_lambda;
wf_sequences->reverse = reverse;
wf_sequences->pattern = NULL;
wf_sequences->text = NULL;
wf_sequences->pattern_begin = 0;
wf_sequences->pattern_length = pattern_length;
wf_sequences->text_begin = 0;
wf_sequences->text_length = text_length;
wf_sequences->match_funct = match_funct;
wf_sequences->match_funct_arguments = match_funct_arguments;
}
void wavefront_sequences_init_packed2bits(
wavefront_sequences_t* const wf_sequences,
const uint8_t* const pattern,
const int pattern_length,
const uint8_t* const text,
const int text_length,
const bool reverse) {
wf_sequences->mode = wf_sequences_ascii;
wf_sequences->reverse = reverse;
wavefront_sequences_init_allocate(wf_sequences,pattern_length,text_length);
wavefront_sequences_init_decode2bits(wf_sequences->pattern_buffer,
pattern,pattern_length,WF_SEQUENCES_PADDING,WF_SEQUENCES_PATTERN_EOS,reverse);
wf_sequences->pattern_buffer_length = pattern_length;
wavefront_sequences_init_decode2bits(wf_sequences->text_buffer,
text,text_length,WF_SEQUENCES_PADDING,WF_SEQUENCES_TEXT_EOS,reverse);
wf_sequences->text_buffer_length = text_length;
wf_sequences->pattern = wf_sequences->pattern_buffer;
wf_sequences->pattern_begin = 0;
wf_sequences->pattern_length = pattern_length;
wf_sequences->pattern_eos = wf_sequences->pattern[pattern_length];
wf_sequences->text = wf_sequences->text_buffer;
wf_sequences->text_begin = 0;
wf_sequences->text_length = text_length;
wf_sequences->text_eos = wf_sequences->text[text_length];
}
bool wavefront_sequences_cmp(
wavefront_sequences_t* const wf_sequences,
const int pattern_pos,
const int text_pos) {
if (wf_sequences->mode == wf_sequences_lambda) {
alignment_match_funct_t match_funct = wf_sequences->match_funct;
void* match_funct_arguments = wf_sequences->match_funct_arguments;
const int pattern_length = wf_sequences->pattern_length;
const int text_length = wf_sequences->text_length;
if (pattern_pos >= pattern_length || text_pos >= text_length) return false;
const int pattern_begin = wf_sequences->pattern_begin;
const int text_begin = wf_sequences->text_begin;
if (wf_sequences->reverse) {
const int pattern_end = pattern_begin + pattern_length - 1;
const int text_end = text_begin + text_length - 1;
return match_funct(pattern_end-pattern_pos,text_end-text_pos,match_funct_arguments);
} else {
return match_funct(pattern_begin+pattern_pos,text_begin+text_pos,match_funct_arguments);
}
} else {
return wf_sequences->pattern[pattern_pos] == wf_sequences->text[text_pos];
}
}
char wavefront_sequences_get_pattern(
wavefront_sequences_t* const wf_sequences,
const int position) {
if (wf_sequences->mode == wf_sequences_lambda) {
return '-';
} else {
return wf_sequences->pattern[position];
}
}
char wavefront_sequences_get_text(
wavefront_sequences_t* const wf_sequences,
const int position) {
if (wf_sequences->mode == wf_sequences_lambda) {
return '-';
} else {
return wf_sequences->text[position];
}
}
void wavefront_sequences_set_bounds(
wavefront_sequences_t* const wf_sequences,
const int pattern_begin,
const int pattern_end,
const int text_begin,
const int text_end) {
if (wf_sequences->mode != wf_sequences_lambda) {
const int pattern_length_old = wf_sequences->pattern_length;
const int text_length_old = wf_sequences->text_length;
wf_sequences->pattern[pattern_length_old] = wf_sequences->pattern_eos;
wf_sequences->text[text_length_old] = wf_sequences->text_eos;
if (wf_sequences->reverse) {
wf_sequences->pattern = wf_sequences->pattern_buffer + (wf_sequences->pattern_buffer_length - pattern_end);
wf_sequences->text = wf_sequences->text_buffer + (wf_sequences->text_buffer_length - text_end);
} else {
wf_sequences->pattern = wf_sequences->pattern_buffer + pattern_begin;
wf_sequences->text = wf_sequences->text_buffer + text_begin;
}
const int pattern_length_new = pattern_end - pattern_begin;
const int text_length_new = text_end - text_begin;
wf_sequences->pattern_eos = wf_sequences->pattern[pattern_length_new];
wf_sequences->text_eos = wf_sequences->text[text_length_new];
wf_sequences->pattern[pattern_length_new] = WF_SEQUENCES_PATTERN_EOS;
wf_sequences->text[text_length_new] = WF_SEQUENCES_TEXT_EOS;
}
wf_sequences->pattern_begin = pattern_begin;
wf_sequences->pattern_length = pattern_end - pattern_begin;
wf_sequences->text_begin = text_begin;
wf_sequences->text_length = text_end - text_begin;
}