#include "utils/commons.h"
#include "system/mm_allocator.h"
#include "wavefront_unialign.h"
#include "wavefront.h"
#include "wavefront_attributes.h"
#include "wavefront_offset.h"
#include "wavefront_penalties.h"
#include "wavefront_plot.h"
#include "wavefront_slab.h"
#include "wavefront_components.h"
#include "wavefront_compute.h"
#include "wavefront_compute_affine.h"
#include "wavefront_compute_affine2p.h"
#include "wavefront_compute_edit.h"
#include "wavefront_compute_linear.h"
#include "wavefront_extend.h"
#include "wavefront_backtrace.h"
#include "wavefront_backtrace_buffer.h"
void wavefront_unialign_init(
wavefront_aligner_t* const wf_aligner,
const affine2p_matrix_type component_begin,
const affine2p_matrix_type component_end) {
wavefront_align_status_t* const align_status = &wf_aligner->align_status;
alignment_form_t* const alignment_form = &wf_aligner->alignment_form;
const bool end2end = (alignment_form->span == alignment_end2end);
switch (wf_aligner->penalties.distance_metric) {
case indel:
case edit:
align_status->wf_align_compute = &wavefront_compute_edit;
break;
case gap_linear:
align_status->wf_align_compute = &wavefront_compute_linear;
break;
case gap_affine:
align_status->wf_align_compute = &wavefront_compute_affine;
break;
case gap_affine_2p:
align_status->wf_align_compute = &wavefront_compute_affine2p;
break;
default:
fprintf(stderr,"[WFA] Distance function not implemented\n");
exit(1);
break;
}
if (end2end) {
align_status->wf_align_extend = &wavefront_extend_end2end;
} else {
align_status->wf_align_extend = &wavefront_extend_endsfree;
}
wf_aligner->component_begin = component_begin;
wf_aligner->component_end = component_end;
wavefront_aligner_init(wf_aligner,0);
cigar_clear(wf_aligner->cigar);
}
bool wavefront_unialign_reached_limits(
wavefront_aligner_t* const wf_aligner,
const int score) {
if (score >= wf_aligner->system.max_alignment_steps) {
wf_aligner->cigar->score = -wf_aligner->system.max_alignment_steps;
wf_aligner->align_status.status = WF_STATUS_MAX_STEPS_REACHED;
wf_aligner->align_status.score = score;
return true; }
alignment_system_t* const system = &wf_aligner->system;
if (score % system->probe_interval_global != 0) return false; if (system->verbose >= 3) {
wavefront_unialign_print_status(stderr,wf_aligner,score); }
wavefront_components_t* const wf_components = &wf_aligner->wf_components;
if (wf_components->bt_buffer!=NULL && (score%system->probe_interval_compact)==0) {
uint64_t bt_memory = wf_backtrace_buffer_get_size_used(wf_components->bt_buffer);
if (bt_memory > system->max_memory_compact) {
wavefront_components_compact_bt_buffer(wf_components,score,wf_aligner->system.verbose);
bt_memory = wf_backtrace_buffer_get_size_used(wf_components->bt_buffer);
uint64_t proposed_mem = (double)bt_memory * TELESCOPIC_FACTOR;
if (system->max_memory_compact < proposed_mem && proposed_mem < system->max_memory_abort) {
proposed_mem = system->max_memory_compact;
}
if (wf_components->bt_buffer->num_compactions >= system->max_partial_compacts) {
wf_backtrace_buffer_reset_compaction(wf_components->bt_buffer);
}
}
}
const uint64_t wf_memory_used = wavefront_aligner_get_size(wf_aligner);
if (wf_memory_used > system->max_memory_abort) {
wf_aligner->align_status.status = WF_STATUS_OOM;
wf_aligner->align_status.score = score;
return true; }
return false;
}
void wavefront_unialign_terminate(
wavefront_aligner_t* const wf_aligner,
const int score) {
wavefront_align_status_t* const align_status = &wf_aligner->align_status;
wavefront_sequences_t* const sequences = &wf_aligner->sequences;
const int pattern_length = sequences->pattern_length;
const int text_length = sequences->text_length;
cigar_t* const cigar = wf_aligner->cigar;
align_status->score = score;
if (wf_aligner->alignment_scope == compute_score) {
if (align_status->status == WF_STATUS_END_REACHED) {
cigar->end_v = pattern_length;
cigar->end_h = text_length;
cigar->score = wavefront_compute_classic_score(wf_aligner,pattern_length,text_length,score);
align_status->status = WF_STATUS_ALG_COMPLETED;
} else {
const int k = wf_aligner->alignment_end_pos.k;
const int offset = wf_aligner->alignment_end_pos.offset;
cigar->end_v = WAVEFRONT_V(k,offset);
cigar->end_h = WAVEFRONT_H(k,offset);
cigar->score = wavefront_compute_classic_score(wf_aligner,cigar->end_v,cigar->end_h,score);
align_status->dropped = true;
align_status->status = WF_STATUS_ALG_PARTIAL;
}
} else {
wavefront_components_t* const wf_components = &wf_aligner->wf_components;
const int alignment_end_k = wf_aligner->alignment_end_pos.k;
const wf_offset_t alignment_end_offset = wf_aligner->alignment_end_pos.offset;
if (alignment_end_offset != WAVEFRONT_OFFSET_NULL) {
if (wf_components->bt_piggyback) {
const bool memory_modular = wf_aligner->wf_components.memory_modular;
const int max_score_scope = wf_aligner->wf_components.max_score_scope;
const int score_mod = (memory_modular) ? score % max_score_scope : score;
wavefront_t* const mwavefront = wf_components->mwavefronts[score_mod];
wavefront_backtrace_pcigar(
wf_aligner,alignment_end_k,alignment_end_offset,
mwavefront->bt_pcigar[alignment_end_k],
mwavefront->bt_prev[alignment_end_k]);
} else {
if (wf_aligner->penalties.distance_metric <= gap_linear) {
wavefront_backtrace_linear(wf_aligner,
score,alignment_end_k,alignment_end_offset);
} else {
wavefront_backtrace_affine(wf_aligner,
wf_aligner->component_begin,wf_aligner->component_end,
score,alignment_end_k,alignment_end_offset);
}
}
}
const bool do_extension = wf_aligner->alignment_form.extension;
const bool unreachable = (align_status->status == WF_STATUS_END_UNREACHABLE);
align_status->dropped = unreachable;
if (do_extension || unreachable) {
const bool cigar_trimmed = wavefront_aligner_maxtrim_cigar(wf_aligner);
if (cigar_trimmed) {
align_status->status = WF_STATUS_ALG_PARTIAL;
} else {
align_status->status = (align_status->status == WF_STATUS_END_UNREACHABLE) ?
WF_STATUS_ALG_PARTIAL : WF_STATUS_ALG_COMPLETED;
}
} else {
const int k = wf_aligner->alignment_end_pos.k;
const int offset = wf_aligner->alignment_end_pos.offset;
cigar->end_v = WAVEFRONT_V(k,offset);
cigar->end_h = WAVEFRONT_H(k,offset);
cigar->score = wavefront_compute_classic_score(wf_aligner,cigar->end_v,cigar->end_h,score);
if (unreachable) {
align_status->status = WF_STATUS_ALG_PARTIAL;
} else {
align_status->status = WF_STATUS_ALG_COMPLETED;
}
}
}
}
#define WFA_UNIALIGN_DEBUG() wavefront_aligner_print(stderr,wf_aligner,0,score,7,0)
int wavefront_unialign(
wavefront_aligner_t* const wf_aligner) {
wavefront_align_status_t* const align_status = &wf_aligner->align_status;
void (*wf_align_compute)(wavefront_aligner_t* const,const int) = align_status->wf_align_compute;
int (*wf_align_extend)(wavefront_aligner_t* const,const int) = align_status->wf_align_extend;
int score = align_status->score;
while (true) {
const int finished = (*wf_align_extend)(wf_aligner,score);
if (finished) {
if (align_status->status == WF_STATUS_END_REACHED ||
align_status->status == WF_STATUS_END_UNREACHABLE) {
wavefront_unialign_terminate(wf_aligner,score);
}
return align_status->status;
}
++score;
(*wf_align_compute)(wf_aligner,score);
if (wavefront_unialign_reached_limits(wf_aligner,score)) return align_status->status;
if (wf_aligner->plot != NULL) wavefront_plot(wf_aligner,score,0);
}
return WF_STATUS_OK;
}
void wavefront_unialign_print_status(
FILE* const stream,
wavefront_aligner_t* const wf_aligner,
const int score) {
wavefront_components_t* const wf_components = &wf_aligner->wf_components;
wavefront_sequences_t* const sequences = &wf_aligner->sequences;
const int pattern_length = sequences->pattern_length;
const int text_length = sequences->text_length;
const int dist_total = MAX(text_length,pattern_length);
int s = (wf_components->memory_modular) ? score%wf_components->max_score_scope : score;
wavefront_t* wavefront = wf_components->mwavefronts[s];
if (wavefront==NULL && s>0) {
s = (wf_components->memory_modular) ? (score-1)%wf_components->max_score_scope : (score-1);
wavefront = wf_components->mwavefronts[s];
}
int dist_max = -1, wf_len = -1, k;
if (wavefront!=NULL) {
wf_offset_t* const offsets = wavefront->offsets;
for (k=wavefront->lo;k<=wavefront->hi;++k) {
const int dist = MAX(WAVEFRONT_V(k,offsets[k]),WAVEFRONT_H(k,offsets[k]));
dist_max = MAX(dist_max,dist);
}
wf_len = wavefront->hi-wavefront->lo+1;
}
const uint64_t slab_size = wavefront_slab_get_size(wf_aligner->wavefront_slab);
const uint64_t bt_buffer_used = (wf_components->bt_buffer) ?
wf_backtrace_buffer_get_size_used(wf_components->bt_buffer) : 0;
const float aligned_progress = (dist_max>=0) ? (100.0f*(float)dist_max/(float)dist_total) : -1.0f;
const float million_offsets = (wf_len>=0) ? (float)wf_len/1000000.0f : -1.0f;
fprintf(stream,"[");
wavefront_aligner_print_mode(stream,wf_aligner);
fprintf(stream,
"] SequenceLength=(%d,%d) Score %d (~ %2.3f%% aligned). "
"MemoryUsed(WF-Slab,BT-buffer)=(%lu MB,%lu MB). "
"Wavefronts ~ %2.3f Moffsets\n",
pattern_length,
text_length,
score,
aligned_progress,
CONVERT_B_TO_MB(slab_size),
CONVERT_B_TO_MB(bt_buffer_used),
million_offsets);
}