1use std::cmp::max;
39
40use crate::utils::TextSlice;
41
42use crate::alignment::pairwise::{MatchFunc, Scoring};
43
44use petgraph::graph::NodeIndex;
45use petgraph::visit::Topo;
46
47use petgraph::{Directed, Graph, Incoming};
48
49pub const MIN_SCORE: i32 = -858_993_459; pub type POAGraph = Graph<u8, i32, Directed, usize>;
51
52#[derive(Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash, Debug, Serialize, Deserialize)]
57pub enum AlignmentOperation {
58 Match(Option<(usize, usize)>),
59 Del(Option<(usize, usize)>),
60 Ins(Option<usize>),
61 Xclip(usize),
62 Yclip(usize, usize), }
64
65#[derive(Default, Clone, Eq, PartialEq, Ord, PartialOrd, Hash, Debug, Serialize, Deserialize)]
66pub struct Alignment {
67 pub score: i32,
68 operations: Vec<AlignmentOperation>,
70}
71
72impl Alignment {
73 pub fn pretty(
95 &self,
96 consensus: TextSlice,
97 sequences: Vec<TextSlice>,
98 graph: &POAGraph,
99 ncol: usize,
100 ) -> String {
101 let mut seq_indices: Vec<usize> = vec![0; sequences.len()];
103 let mut seq_pretty: Vec<Vec<u8>> = vec![vec![]; sequences.len()];
104 let mut con_index: usize = 0;
105 let mut con_pretty: Vec<u8> = vec![];
106 let mut topo = Topo::new(graph);
107 while let Some(node) = topo.next(graph) {
109 let topo_base = graph.raw_nodes()[node.index()].weight;
110 let mut all_null = true;
111 for current_seq in 0..sequences.len() {
113 if seq_indices[current_seq] >= sequences[current_seq].len() {
114 seq_pretty[current_seq].push(b'-');
115 } else if sequences[current_seq][seq_indices[current_seq]] == topo_base {
116 seq_pretty[current_seq].push(topo_base);
117 seq_indices[current_seq] += 1;
118 all_null = false;
119 } else {
120 seq_pretty[current_seq].push(b'-');
121 }
122 }
123 if con_index >= consensus.len() {
125 con_pretty.push(b'-');
126 } else if consensus[con_index] == topo_base {
127 con_pretty.push(topo_base);
128 con_index += 1;
129 all_null = false;
130 } else {
131 con_pretty.push(b'-');
132 }
133 if all_null {
134 for current_seq in seq_pretty.iter_mut() {
135 current_seq.pop();
136 }
137 con_pretty.pop();
138 }
139 }
140 let mut s = String::new();
141 let mut idx = 0;
142 use std::cmp::min;
143
144 let ml = con_pretty.len();
145
146 while idx < ml {
147 let rng = idx..min(idx + ncol, ml);
148 s.push_str(&format!(
149 "cons:\t{}\n",
150 &String::from_utf8_lossy(&con_pretty[rng.clone()])
151 ));
152 for (seq_index, seq) in seq_pretty.iter().enumerate() {
153 s.push_str(&format!(
154 "seq{}:\t{}\n",
155 seq_index + 1,
156 &String::from_utf8_lossy(&seq[rng.clone()])
157 ));
158 }
159 s.push('\n');
160 idx += ncol;
161 }
162 s
163 }
164}
165
166#[derive(Default, Clone, Eq, PartialEq, Ord, PartialOrd, Debug)]
167pub struct Traceback {
168 rows: usize,
169 cols: usize,
170 best_in_last_row: usize,
172 best_in_last_col: usize,
173 best_overall: (usize, usize),
174 last: NodeIndex<usize>,
177 start_end_vec: Vec<(usize, usize)>,
179 matrix: Vec<Vec<i32>>,
181}
182
183impl Traceback {
184 fn with_capacity(m: usize, n: usize) -> Self {
191 let matrix: Vec<Vec<i32>> = vec![vec![]; m + 1];
193 let start_end_vec = vec![(0, n + 1); m + 1];
194 Traceback {
195 rows: m,
196 cols: n,
197 best_in_last_row: 0,
198 best_in_last_col: 0,
199 best_overall: (0, 0),
200 last: NodeIndex::new(0),
201 start_end_vec: start_end_vec,
202 matrix,
203 }
204 }
205 fn initialize_scores(&mut self, gap_open: i32, yclip_prefix: i32) {
207 for j in 0..=self.cols {
208 self.matrix[0].push(max((j as i32) * gap_open, yclip_prefix));
209 }
210 self.matrix[0][0] = 0;
211 }
212
213 fn new() -> Self {
214 Traceback {
215 rows: 0,
216 cols: 0,
217 best_in_last_row: 0,
218 best_in_last_col: 0,
219 best_overall: (0, 0),
220 last: NodeIndex::new(0),
221 start_end_vec: Vec::new(),
222 matrix: Vec::new(),
223 }
224 }
225
226 fn new_row(
228 &mut self,
229 row: usize,
230 size: usize,
231 gap_open: i32,
232 xclip_prefix: i32,
233 start: usize,
234 end: usize,
235 ) {
236 self.start_end_vec[row] = (start, end);
237 if start == 0 {
239 self.matrix[row].push(max((row as i32) * gap_open, xclip_prefix));
240 } else {
241 self.matrix[row].push(MIN_SCORE);
242 }
243 for _ in 1..=size {
244 self.matrix[row].push(MIN_SCORE);
245 }
246 }
247
248 fn set(&mut self, i: usize, j: usize, cell: i32) {
249 if !(self.start_end_vec[i].0 > j || self.start_end_vec[i].1 < j) {
251 let real_position = j - self.start_end_vec[i].0;
252 self.matrix[i][real_position] = cell;
253 }
254 }
255
256 fn get(&self, i: usize, j: usize) -> i32 {
257 if !(self.start_end_vec[i].0 > j
259 || self.start_end_vec[i].1 <= j
260 || self.matrix[i].is_empty())
261 {
262 let real_position = j - self.start_end_vec[i].0;
263 self.matrix[i][real_position]
264 }
265 else {
267 MIN_SCORE
268 }
269 }
270}
271
272#[derive(Default, Clone, Debug)]
276pub struct Aligner<F: MatchFunc> {
277 traceback: Traceback,
278 query: Vec<u8>,
279 poa: Poa<F>,
280}
281
282impl<F: MatchFunc> Aligner<F> {
283 pub fn new(scoring: Scoring<F>, reference: TextSlice) -> Self {
285 Aligner {
286 traceback: Traceback::new(),
287 query: reference.to_vec(),
288 poa: Poa::from_string(scoring, reference),
289 }
290 }
291
292 pub fn add_to_graph(&mut self) -> &mut Self {
294 let alignment = self.poa.recalculate_alignment(&self.traceback);
295 self.poa.add_alignment(&alignment, &self.query);
296 self
297 }
298
299 pub fn alignment(&self) -> Alignment {
301 self.poa.recalculate_alignment(&self.traceback)
302 }
303
304 pub fn add_alignment(&mut self, alignment: &Alignment) -> &mut Self {
306 self.poa.add_alignment(&alignment, &self.query);
307 self
308 }
309
310 pub fn global(&mut self, query: TextSlice) -> &mut Self {
312 let clip_penalties = [
314 self.poa.scoring.xclip_prefix,
315 self.poa.scoring.xclip_suffix,
316 self.poa.scoring.yclip_prefix,
317 self.poa.scoring.yclip_suffix,
318 ];
319
320 self.poa.scoring.xclip_prefix = MIN_SCORE;
322 self.poa.scoring.xclip_suffix = MIN_SCORE;
323 self.poa.scoring.yclip_prefix = MIN_SCORE;
324 self.poa.scoring.yclip_suffix = MIN_SCORE;
325
326 self.query = query.to_vec();
327 self.traceback = self.poa.custom(query);
328
329 self.poa.scoring.xclip_prefix = clip_penalties[0];
331 self.poa.scoring.xclip_suffix = clip_penalties[1];
332 self.poa.scoring.yclip_prefix = clip_penalties[2];
333 self.poa.scoring.yclip_suffix = clip_penalties[3];
334
335 self
336 }
337
338 pub fn semiglobal(&mut self, query: TextSlice) -> &mut Self {
340 let clip_penalties = [
342 self.poa.scoring.xclip_prefix,
343 self.poa.scoring.xclip_suffix,
344 self.poa.scoring.yclip_prefix,
345 self.poa.scoring.yclip_suffix,
346 ];
347
348 self.poa.scoring.xclip_prefix = MIN_SCORE;
350 self.poa.scoring.xclip_suffix = MIN_SCORE;
351 self.poa.scoring.yclip_prefix = 0;
352 self.poa.scoring.yclip_suffix = 0;
353
354 self.query = query.to_vec();
355 self.traceback = self.poa.custom(query);
356
357 self.poa.scoring.xclip_prefix = clip_penalties[0];
359 self.poa.scoring.xclip_suffix = clip_penalties[1];
360 self.poa.scoring.yclip_prefix = clip_penalties[2];
361 self.poa.scoring.yclip_suffix = clip_penalties[3];
362
363 self
364 }
365
366 pub fn local(&mut self, query: TextSlice) -> &mut Self {
368 let clip_penalties = [
370 self.poa.scoring.xclip_prefix,
371 self.poa.scoring.xclip_suffix,
372 self.poa.scoring.yclip_prefix,
373 self.poa.scoring.yclip_suffix,
374 ];
375
376 self.poa.scoring.xclip_prefix = 0;
378 self.poa.scoring.xclip_suffix = 0;
379 self.poa.scoring.yclip_prefix = 0;
380 self.poa.scoring.yclip_suffix = 0;
381
382 self.query = query.to_vec();
383 self.traceback = self.poa.custom(query);
384
385 self.poa.scoring.xclip_prefix = clip_penalties[0];
387 self.poa.scoring.xclip_suffix = clip_penalties[1];
388 self.poa.scoring.yclip_prefix = clip_penalties[2];
389 self.poa.scoring.yclip_suffix = clip_penalties[3];
390
391 self
392 }
393
394 pub fn custom(&mut self, query: TextSlice) -> &mut Self {
396 self.query = query.to_vec();
397 self.traceback = self.poa.custom(query);
398 self
399 }
400
401 pub fn global_banded(&mut self, query: TextSlice, bandwidth: usize) -> &mut Self {
404 self.query = query.to_vec();
405 self.traceback = self.poa.global_banded(query, bandwidth);
406 self
407 }
408
409 pub fn graph(&self) -> &POAGraph {
411 &self.poa.graph
412 }
413 pub fn consensus(&self) -> Vec<u8> {
415 let mut consensus: Vec<u8> = vec![];
416 let max_index = self.poa.graph.node_count();
417 let mut weight_score_next_vec: Vec<(i32, i32, usize)> = vec![(0, 0, 0); max_index + 1];
418 let mut topo = Topo::new(&self.poa.graph);
419 while let Some(node) = topo.next(&self.poa.graph) {
421 let mut best_weight_score_next: (i32, i32, usize) = (0, 0, usize::MAX);
422 let neighbour_nodes = self.poa.graph.neighbors_directed(node, Incoming);
423 for neighbour_node in neighbour_nodes {
425 let neighbour_index = neighbour_node.index();
426 let neighbour_score = weight_score_next_vec[neighbour_index].1;
427 let edges = self.poa.graph.edges_connecting(neighbour_node, node);
428 let weight = edges.map(|edge| edge.weight()).sum();
429 let current_node_score = weight + neighbour_score;
430 if (weight, current_node_score, neighbour_index) > best_weight_score_next {
432 best_weight_score_next = (weight, current_node_score, neighbour_index);
433 }
434 }
435 weight_score_next_vec[node.index()] = best_weight_score_next;
436 }
437 let mut pos = weight_score_next_vec
439 .iter()
440 .enumerate()
441 .max_by_key(|(_, &value)| value.1)
442 .map(|(idx, _)| idx)
443 .unwrap();
444 while pos != usize::MAX {
446 consensus.push(self.poa.graph.raw_nodes()[pos].weight);
447 pos = weight_score_next_vec[pos].2;
448 }
449 consensus.reverse();
450 consensus
451 }
452}
453
454#[derive(Default, Clone, Debug)]
459pub struct Poa<F: MatchFunc> {
460 scoring: Scoring<F>,
461 pub graph: POAGraph,
462}
463
464impl<F: MatchFunc> Poa<F> {
465 pub fn new(scoring: Scoring<F>, graph: POAGraph) -> Self {
472 Poa { scoring, graph }
473 }
474
475 pub fn from_string(scoring: Scoring<F>, seq: TextSlice) -> Self {
482 let mut graph: Graph<u8, i32, Directed, usize> =
483 Graph::with_capacity(seq.len(), seq.len() - 1);
484 let mut prev: NodeIndex<usize> = graph.add_node(seq[0]);
485 let mut node: NodeIndex<usize>;
486 for base in seq.iter().skip(1) {
487 node = graph.add_node(*base);
488 graph.add_edge(prev, node, 1);
489 prev = node;
490 }
491
492 Poa { scoring, graph }
493 }
494 pub fn custom(&self, query: TextSlice) -> Traceback {
499 assert!(self.graph.node_count() != 0);
500 let (m, n) = (self.graph.node_count(), query.len());
502 let mut traceback = Traceback::with_capacity(m, n);
504 traceback.initialize_scores(self.scoring.gap_open, self.scoring.yclip_prefix);
505 let mut topo = Topo::new(&self.graph);
507 let mut max_score_last_column = i32::MIN; let mut max_score_overall = 0; while let Some(node) = topo.next(&self.graph) {
511 let mut max_score_last_row = i32::MIN; let r = self.graph.raw_nodes()[node.index()].weight; let i = node.index() + 1; traceback.last = node;
515 let prevs: Vec<NodeIndex<usize>> =
517 self.graph.neighbors_directed(node, Incoming).collect();
518 traceback.new_row(
519 i,
520 n + 1,
521 self.scoring.gap_open,
522 self.scoring.xclip_prefix,
523 0,
524 n + 1,
525 );
526 let y_clip_min = traceback.get(i, 0) + self.scoring.yclip_prefix;
528 for (query_index, query_base) in query.iter().enumerate() {
530 let j = query_index + 1; let max_cell = if prevs.is_empty() {
532 traceback.get(0, j - 1) + self.scoring.match_fn.score(r, *query_base)
533 } else {
534 let x_clip_min = traceback.get(0, j) + self.scoring.xclip_prefix;
536 let mut max_cell = max(MIN_SCORE, max(x_clip_min, y_clip_min));
537 for prev_node in &prevs {
538 let i_p: usize = prev_node.index() + 1; max_cell = max(
540 max_cell,
541 max(
542 traceback.get(i_p, j - 1)
543 + self.scoring.match_fn.score(r, *query_base),
544 traceback.get(i_p, j) + self.scoring.gap_open,
545 ),
546 );
547 }
548 max_cell
549 };
550 let score = max(max_cell, traceback.get(i, j - 1) + self.scoring.gap_open);
551 if score > max_score_last_row {
552 max_score_last_row = score;
553 traceback.best_in_last_row = j;
554 }
555 if (score > max_score_last_column) && (query_index == query.len() - 1) {
556 max_score_last_column = score;
557 traceback.best_in_last_col = i;
558 }
559 if score > max_score_overall {
560 max_score_overall = score;
561 traceback.best_overall = (i, j);
562 }
563 traceback.set(i, j, score);
564 }
565 }
566 traceback
567 }
568 pub fn global_banded(&self, query: TextSlice, bandwidth: usize) -> Traceback {
574 assert!(self.graph.node_count() != 0);
575
576 let (m, n) = (self.graph.node_count(), query.len());
578 let mut traceback = Traceback::with_capacity(m, n);
579 traceback.initialize_scores(self.scoring.gap_open, self.scoring.yclip_prefix);
580
581 traceback.set(0, 0, 0);
582
583 let mut max_scoring_j = 0;
588 let mut max_score_for_row = MIN_SCORE;
589 let mut topo = Topo::new(&self.graph);
590 while let Some(node) = topo.next(&self.graph) {
591 let r = self.graph.raw_nodes()[node.index()].weight; let i = node.index() + 1; traceback.last = node;
595 let prevs: Vec<NodeIndex<usize>> =
597 self.graph.neighbors_directed(node, Incoming).collect();
598 let start = if bandwidth > max_scoring_j {
599 0
600 } else {
601 max_scoring_j - bandwidth
602 };
603 let end = max_scoring_j + bandwidth;
604 traceback.new_row(
605 i,
606 (end - start) + 1,
607 self.scoring.gap_open,
608 self.scoring.xclip_prefix,
609 start,
610 end + 1,
611 );
612 for (query_index, query_base) in query.iter().enumerate().skip(start) {
613 let j = query_index + 1; if j > end {
615 break;
616 }
617 let max_cell = if prevs.is_empty() {
618 traceback.get(0, j - 1) + self.scoring.match_fn.score(r, *query_base)
619 } else {
620 let mut max_cell = MIN_SCORE;
621 for prev_node in &prevs {
622 let i_p: usize = prev_node.index() + 1; max_cell = max(
624 max_cell,
625 max(
626 traceback.get(i_p, j - 1)
627 + self.scoring.match_fn.score(r, *query_base),
628 traceback.get(i_p, j) + self.scoring.gap_open,
629 ),
630 );
631 }
632 max_cell
633 };
634
635 let score = max(max_cell, traceback.get(i, j - 1) + self.scoring.gap_open);
636 if score > max_score_for_row {
637 max_scoring_j = j;
638 max_score_for_row = score;
639 }
640 traceback.set(i, j, score);
641 }
642 }
643 traceback
644 }
645 pub fn recalculate_alignment(&self, traceback: &Traceback) -> Alignment {
653 let mut ops: Vec<AlignmentOperation> = vec![];
655 let last_node = traceback.last.index() + 1;
657 let last_query = traceback.cols;
658 let final_score = traceback.get(last_node, last_query);
659
660 let mut curr_node = traceback.last.index() + 1;
661 let mut curr_query = traceback.cols;
662 let xy_score = traceback.get(traceback.best_overall.0, traceback.best_overall.1)
663 + self.scoring.xclip_suffix
664 + self.scoring.yclip_suffix;
665 let y_score =
666 traceback.get(last_node, traceback.best_in_last_row) + self.scoring.yclip_suffix;
667 let x_score =
668 traceback.get(traceback.best_in_last_col, last_query) + self.scoring.xclip_suffix;
669 if (xy_score >= final_score)
671 && (xy_score >= x_score)
672 && (xy_score >= y_score)
673 && (traceback.best_overall.1 != last_query)
674 && (traceback.best_overall.0 != last_node)
675 {
676 ops.push(AlignmentOperation::Xclip(traceback.best_overall.0));
677 ops.push(AlignmentOperation::Yclip(
678 traceback.best_overall.1,
679 last_query,
680 ));
681 curr_node = traceback.best_overall.0;
682 curr_query = traceback.best_overall.1;
683 }
684 else if (y_score >= final_score)
686 && (y_score >= x_score)
687 && (traceback.best_in_last_row != last_query)
688 {
689 ops.push(AlignmentOperation::Yclip(
690 traceback.best_in_last_row,
691 last_query,
692 ));
693 curr_query = traceback.best_in_last_row;
694 }
695 else if (x_score >= final_score) && (traceback.best_in_last_col != last_node) {
697 ops.push(AlignmentOperation::Xclip(traceback.best_in_last_col));
698 curr_node = traceback.best_in_last_col;
699 }
700 loop {
701 let mut current_alignment_operation = AlignmentOperation::Match(None);
702 let current_cell_score = traceback.get(curr_node, curr_query);
703 let mut next_jump = curr_query;
704 let mut next_node = 1;
705 let prevs: Vec<NodeIndex<usize>> = self
707 .graph
708 .neighbors_directed(NodeIndex::new(curr_node - 1), Incoming)
709 .collect();
710 let mut jump_up_score = MIN_SCORE;
711 let mut jump_diagonal_score = MIN_SCORE;
712 let jump_left_score = traceback.get(curr_node, curr_query - 1) + self.scoring.gap_open;
713 if current_cell_score == jump_left_score {
714 current_alignment_operation = AlignmentOperation::Ins(Some(curr_node - 1));
715 next_node = curr_node;
716 next_jump = curr_query - 1;
717 } else {
718 for prev in &prevs {
719 let prev_node = prev.index() + 1;
720 let diagonal_score = traceback.get(prev_node, curr_query - 1);
721 let top_score = traceback.get(prev_node, curr_query);
722 if current_cell_score == top_score + self.scoring.gap_open {
724 jump_up_score =
725 traceback.get(prev_node, curr_query) + self.scoring.gap_open;
726 current_alignment_operation = AlignmentOperation::Del(None);
727 next_jump = curr_query;
728 next_node = prev_node;
729 }
730 else if current_cell_score
732 == diagonal_score + self.scoring.match_fn.score(0, 1)
733 {
734 jump_diagonal_score = traceback.get(prev_node, curr_query - 1)
735 + self.scoring.match_fn.score(0, 1);
736 current_alignment_operation =
737 AlignmentOperation::Match(Some((prev_node - 1, curr_node - 1)));
738 next_node = prev_node;
739 next_jump = curr_query - 1;
740 }
741 else if current_cell_score
743 == diagonal_score + self.scoring.match_fn.score(0, 0)
744 {
745 jump_diagonal_score = traceback.get(prev_node, curr_query - 1)
746 + self.scoring.match_fn.score(0, 0);
747 current_alignment_operation =
748 AlignmentOperation::Match(Some((prev_node - 1, curr_node - 1)));
749 next_node = prev_node;
750 next_jump = curr_query - 1;
751 }
752 }
753 if prevs.len() == 0 {
755 if current_cell_score
757 == traceback.get(0, curr_query - 1) + self.scoring.match_fn.score(0, 0)
758 {
759 current_alignment_operation = AlignmentOperation::Match(None);
760 jump_diagonal_score =
761 traceback.get(0, curr_query - 1) + self.scoring.match_fn.score(0, 0);
762 next_node = 1;
763 next_jump = curr_query - 1;
764 }
765 if current_cell_score
767 == traceback.get(0, curr_query - 1) + self.scoring.match_fn.score(0, 1)
768 {
769 current_alignment_operation = AlignmentOperation::Match(None);
770 jump_diagonal_score =
771 traceback.get(0, curr_query - 1) + self.scoring.match_fn.score(0, 1);
772 next_node = 1;
773 next_jump = curr_query - 1;
774 }
775 }
776 }
777 let max_score = max(jump_diagonal_score, max(jump_up_score, jump_left_score));
779 if self.scoring.xclip_prefix >= max_score {
781 next_node = 0;
782 current_alignment_operation = AlignmentOperation::Xclip(0);
783 }
784 if self.scoring.yclip_prefix >= max(max_score, self.scoring.xclip_prefix) {
786 next_jump = 0;
787 current_alignment_operation = AlignmentOperation::Yclip(0, curr_query);
788 }
789 ops.push(current_alignment_operation);
790 curr_query = next_jump;
792 curr_node = next_node;
793 if prevs.len() == 0 || curr_query == 0 {
795 if prevs.len() == 0 {
797 if curr_query > 0 {
798 for _ in 0..curr_query {
799 if self.scoring.yclip_prefix > MIN_SCORE {
800 ops.push(AlignmentOperation::Yclip(0, curr_query));
801 break;
802 } else {
803 ops.push(AlignmentOperation::Ins(None));
804 }
805 }
806 }
807 } else {
808 if self.scoring.xclip_prefix > MIN_SCORE {
810 ops.push(AlignmentOperation::Xclip(0));
811 } else {
812 ops.push(AlignmentOperation::Del(None));
813 }
814 }
815 break;
816 }
817 }
818 ops.reverse();
819 Alignment {
820 score: final_score as i32,
821 operations: ops,
822 }
823 }
824 pub fn edges(&self, aln: Alignment) -> Vec<usize> {
829 let mut path: Vec<usize> = vec![];
830 let mut prev: NodeIndex<usize> = NodeIndex::new(0);
831 let mut _i: usize = 0;
832 for op in aln.operations {
833 match op {
834 AlignmentOperation::Match(None) => {
835 _i += 1;
836 }
837 AlignmentOperation::Match(Some((_, p))) => {
838 let node = NodeIndex::new(p);
839 let edge = self.graph.find_edge(prev, node).unwrap();
840 path.push(edge.index());
841 prev = NodeIndex::new(p);
842 _i += 1;
843 }
844 AlignmentOperation::Ins(None) => {}
845 AlignmentOperation::Ins(Some(_)) => {}
846 AlignmentOperation::Del(_) => {}
847 AlignmentOperation::Xclip(_) => {}
848 AlignmentOperation::Yclip(_, _) => {}
849 }
850 }
851 path
852 }
853
854 pub fn add_alignment(&mut self, aln: &Alignment, seq: TextSlice) {
861 let head = Topo::new(&self.graph).next(&self.graph).unwrap();
862 let mut prev: NodeIndex<usize> = NodeIndex::new(head.index());
863 let mut i: usize = 0;
864 let mut edge_not_connected: bool = false;
865 for op in &aln.operations {
866 match op {
867 AlignmentOperation::Match(None) => {
868 let node: NodeIndex<usize> = NodeIndex::new(head.index());
869 if (seq[i] != self.graph.raw_nodes()[head.index()].weight) && (seq[i] != b'X') {
870 let node = self.graph.add_node(seq[i]);
871 if edge_not_connected {
872 self.graph.add_edge(prev, node, 1);
873 }
874 edge_not_connected = false;
875 prev = node;
876 }
877 if edge_not_connected {
878 self.graph.add_edge(prev, node, 1);
879 prev = node;
880 edge_not_connected = false;
881 }
882 i += 1;
883 }
884 AlignmentOperation::Match(Some((_, p))) => {
885 let node = NodeIndex::new(*p);
886 if (seq[i] != self.graph.raw_nodes()[*p].weight) && (seq[i] != b'X') {
887 let node = self.graph.add_node(seq[i]);
888 self.graph.add_edge(prev, node, 1);
889 prev = node;
890 } else {
891 match self.graph.find_edge(prev, node) {
893 Some(edge) => {
894 *self.graph.edge_weight_mut(edge).unwrap() += 1;
895 }
896 None => {
897 if prev.index() != head.index() && prev.index() != node.index() {
898 self.graph.add_edge(prev, node, 1);
899 }
900 }
901 }
902 prev = NodeIndex::new(*p);
903 }
904 i += 1;
905 }
906 AlignmentOperation::Ins(None) => {
907 let node = self.graph.add_node(seq[i]);
908 if edge_not_connected {
909 self.graph.add_edge(prev, node, 1);
910 }
911 prev = node;
912 edge_not_connected = true;
913 i += 1;
914 }
915 AlignmentOperation::Ins(Some(_)) => {
916 let node = self.graph.add_node(seq[i]);
917 self.graph.add_edge(prev, node, 1);
918 prev = node;
919 i += 1;
920 }
921 AlignmentOperation::Del(_) => {} AlignmentOperation::Xclip(_) => {}
923 AlignmentOperation::Yclip(_, r) => {
924 i = *r;
925 }
926 }
927 }
928 }
929}
930
931#[cfg(test)]
932mod tests {
933 use super::*;
934 use crate::alignment::pairwise::Scoring;
935 use petgraph::dot::Dot;
936 use petgraph::graph::NodeIndex;
937
938 #[test]
939 fn test_init_graph() {
940 let scoring = Scoring::new(-1, 0, |a: u8, b: u8| if a == b { 1i32 } else { -1i32 });
943 let poa = Poa::from_string(scoring, b"123456789");
944 assert!(poa.graph.is_directed());
945 assert_eq!(poa.graph.node_count(), 9);
946 assert_eq!(poa.graph.edge_count(), 8);
947 }
948
949 #[test]
950 fn test_alignment() {
951 let scoring = Scoring::new(-1, 0, |a: u8, b: u8| if a == b { 1i32 } else { -1i32 });
952 let poa = Poa::from_string(scoring, b"GATTACA");
956 let traceback = poa.custom(b"GCATGCU");
957 let alignment = poa.recalculate_alignment(&traceback);
958 assert_eq!(alignment.score, 0);
959
960 let traceback = poa.custom(b"GCATGCUx");
961 let alignment = poa.recalculate_alignment(&traceback);
962 assert_eq!(alignment.score, -1);
963
964 let traceback = poa.custom(b"xCATGCU");
965 let alignment = poa.recalculate_alignment(&traceback);
966 assert_eq!(alignment.score, -2);
967 }
968
969 #[test]
970 fn test_branched_alignment() {
971 let scoring = Scoring::new(-1, 0, |a: u8, b: u8| if a == b { 1i32 } else { -1i32 });
972 let seq1 = b"TTTTT";
973 let seq2 = b"TTATT";
974 let mut poa = Poa::from_string(scoring, seq1);
975 let head: NodeIndex<usize> = NodeIndex::new(1);
976 let tail: NodeIndex<usize> = NodeIndex::new(2);
977 let node1 = poa.graph.add_node(b'A');
978 let node2 = poa.graph.add_node(b'A');
979 poa.graph.add_edge(head, node1, 1);
980 poa.graph.add_edge(node1, node2, 1);
981 poa.graph.add_edge(node2, tail, 1);
982 let traceback = poa.custom(seq2);
983 let alignment = poa.recalculate_alignment(&traceback);
984 assert_eq!(alignment.score, 3);
985 }
986
987 #[test]
988 fn test_alt_branched_alignment() {
989 let scoring = Scoring::new(-1, 0, |a: u8, b: u8| if a == b { 1i32 } else { -1i32 });
990
991 let seq1 = b"TTCCTTAA";
992 let seq2 = b"TTTTGGAA";
993 let mut poa = Poa::from_string(scoring, seq1);
994 let head: NodeIndex<usize> = NodeIndex::new(1);
995 let tail: NodeIndex<usize> = NodeIndex::new(2);
996 let node1 = poa.graph.add_node(b'A');
997 let node2 = poa.graph.add_node(b'A');
998 poa.graph.add_edge(head, node1, 1);
999 poa.graph.add_edge(node1, node2, 1);
1000 poa.graph.add_edge(node2, tail, 1);
1001 let traceback = poa.custom(seq2);
1002 let alignment = poa.recalculate_alignment(&traceback);
1003 poa.add_alignment(&alignment, seq2);
1004 assert_eq!(poa.graph.edge_count(), 14);
1005 assert!(poa
1006 .graph
1007 .contains_edge(NodeIndex::new(5), NodeIndex::new(10)));
1008 assert!(poa
1009 .graph
1010 .contains_edge(NodeIndex::new(11), NodeIndex::new(6)));
1011 }
1012
1013 #[test]
1014 fn test_insertion_on_branch() {
1015 let scoring = Scoring::new(-1, 0, |a: u8, b: u8| if a == b { 1i32 } else { -1i32 });
1016
1017 let seq1 = b"TTCCGGTTTAA";
1018 let seq2 = b"TTGGTATGGGAA";
1019 let seq3 = b"TTGGTTTGCGAA";
1020 let mut poa = Poa::from_string(scoring, seq1);
1021 let head: NodeIndex<usize> = NodeIndex::new(1);
1022 let tail: NodeIndex<usize> = NodeIndex::new(2);
1023 let node1 = poa.graph.add_node(b'C');
1024 let node2 = poa.graph.add_node(b'C');
1025 let node3 = poa.graph.add_node(b'C');
1026 poa.graph.add_edge(head, node1, 1);
1027 poa.graph.add_edge(node1, node2, 1);
1028 poa.graph.add_edge(node2, node3, 1);
1029 poa.graph.add_edge(node3, tail, 1);
1030 let traceback = poa.custom(seq2);
1031 let alignment = poa.recalculate_alignment(&traceback);
1032 assert_eq!(alignment.score, 2);
1033 poa.add_alignment(&alignment, seq2);
1034 let traceback = poa.custom(seq3);
1035 let alignment2 = poa.recalculate_alignment(&traceback);
1036
1037 assert_eq!(alignment2.score, 10);
1038 }
1039
1040 #[test]
1041 fn test_poa_method_chaining() {
1042 let scoring = Scoring::new(-1, 0, |a: u8, b: u8| if a == b { 1i32 } else { -1i32 });
1043 let mut aligner = Aligner::new(scoring, b"TTCCGGTTTAA");
1044 aligner
1045 .global(b"TTGGTATGGGAA")
1046 .add_to_graph()
1047 .global(b"TTGGTTTGCGAA");
1048 assert_eq!(aligner.alignment().score, 10);
1049 }
1050
1051 #[test]
1052 fn test_global_banded() {
1053 let s1 = b"TGGCATGCTCAAGGACCGTTGAATACTATCTTAATGGACCGCAAGCTCCCTGAAGGTGGGCCACATTCGAGGGCC\
1056 CGGCCTCCACCTATTCCCAACGAAACTAGCATTAACATGGACAGGGGCGCATAAAACAGAGTTTCTCCTAATCCCCTTTCCCCTG\
1057 GAGTGCTAGTCAGAACCGCACATGTTGACGCTTTGGTCAGGTGTAGCCGATTCACTACCCGGGGTAGTACGAGTGGTAGCACCAT\
1058 GGTTAGCTTCTCCGGGATGTTCCGCGAAGAGAGCGGAGCGGGCGTGCACAAGCTCGGACAACCCTAGTGTGCATCAAATGCCATA\
1059 TGTTCTGCTTTGTCTGTGACTCACGCCCACGTTTGACATCACTCTTACTATCCAACGGGCCAAGCTTAGGAGGGGCGGACCTATT\
1060 GAACCATTAGAGGGGATCCTTCTGAAGTTAAGGCACAGCGTTGAGGGGCTATAGTCGATCCTCTTAGTAAATATAATGGACAGGT\
1061 CTTTACGACACAGTATGAATTAGTCCAATGGAGCCATTGTAATCGATGAAACTGTTATATCTGTTGGCCTAGTCGCAACGGTCTA\
1062 CATCGCTAGCGTAACGGTTAAGACCTCTTCCACGAGTGGGACACTCATAAAGCTCGCGGCCCTTACGATCTAGGGGAGCGCACTC\
1063 CGTAGTCAATCACGGCCAGCCGGTGTGCGCTAAGTTACGAAACAGTCACGAGCGATGAACCGTATGAAGAATGGACCCTTCTAAG\
1064 ATGTGAACACCTAGATGAGCAGCAAGACAATTGCTCTCGCCGACTCGTTCGAAAGTGTACCTCGAGAGCACAACACGCATTACCC\
1065 AGGTGACCGTGTATTGACTGCCCGTTACTCAGAAACCTTACAGTATTAATCGCCTAGTCTGTATAGTATTCATTCTGCCCGTGAC
1066 ATGCGGGAAGCCTGCTGAGATTGGCAGCGTCTTTGGAGGGTTACCAAGCGAGGACACGGGCAAATTGAGGTGT";
1067 let s2 = b"TGGCTACATGCTCAAGCATCGTTGAAGCTCATCTTAATGGACCGCAACGGCCGCCTGAAGGTGGGACACGTGACG\
1068 GGCGGGGGCCCGGCCTTAACCCATTCTCAAGCAACTAGCATACTGGACAGCGGCGCATATACAGAGAATCGCCTAAACCCACTTT\
1069 TGCCCTGAGTGCTAGTCAGTCCCCCACATCTGACACTTCGGTGGCGCACGTTTAGCAGCTTACACTACCCGGGGCAGTACGAGTG\
1070 CTAGCACGGTAGCCTCCGGAGGGCTGCGAGGAATAGAACGGAGAGGGCGTCCTCAAGCCGGACAACCCTAGTGTGCATCAAATGA\
1071 TGCCTGCTGATTTTCTGTGCATTTCACGCCCAATTCACAATCACTCCTACTATCCAACGGGCAAGCATAAGGGAGGGGGGGAGTA\
1072 CGTCTATTGCACCATTAGAGGGGTACTTCGAATTCGTTGAACTGAGATAGAGTCGATCCTCTTTGTATATAAACGCAGGTACTTT\
1073 GCTATAAGGTGAATTATTCAAATGGAGCCATTGTAATCGATGACAATGTTATACCTTTAGGCCTAGATCAACGGTCTCCATCGCA\
1074 AGCGTAACGATTATGACCCAACGAGTGGACACTCATAGAGCGGCCCTTACGAGCTAGGCGAGCGCAATCCGTGTGAATCACAGCC\
1075 AGACGGGATGTTGCGTTAAGCTACGAAACATCACGCGGTGAGCGTATGAATAATGGACCCGTCAAAATGTGGGCAGCGAGCAGCA\
1076 GGACAATTGCTCGGGTCCGGTAGCGACTCGTTCGAAACTGTAAACGTCGAGGCACAACACGCATTAGCCAGGTGAACATGTATTG\
1077 ACCGCCCCGTAGATACCTTACAGTATTAATCGCCTAGTCTGTATGATCTTCGTTCTGCCTGTGAACATGAGGGAAGCCTGCTTGA\
1078 GTTTGGCAGCGTCTTTGGGGTTTCCAAGCGAGCGACACGGGCAAATTGAGGTGT";
1079 let s3 = b"TGGCATGCTCAAGGAGTGCTGAAGCTCATTTTAATGGACCGCAACGGCCGCCTGAAGGTGGGGCACGTGACGGGC\
1080 GAGGGCCCGGCCTTAACCCATTCTCAAGAAACTCGTATACTGGACAGCGGCGCATATAGAGAGATTCTCCTAAACCCTCTTTTGC\
1081 CCTGACATGTGCTAGTCAGTCGCCCACATCTGAACACTTCGGCAGCGCACGTCTAGCAGCTTACACTACCGGGCGGGGCAGGTAC\
1082 GAGTGCTAGCACGGTAGCCTCTCCCGGAGTGCTGGGAATAGAAGGGAGAGGGCGTCCTCATGCCGGCGACCCTAGTGTGCATCAA\
1083 ATGAGATGCCTGCTGTGATTTTCACATTCACAATCACTCTTACCATCCCAACGGGACAAGCATAAGGGAGGGGGGGAGCTATTGA\
1084 ACCAAGAGGGGTCCTCCGGAATTCGTTGAGCTGCGATAGAGTCGATCCTCTTTGTATATAAACGCAGGTACTTTGCGATTAGGTG\
1085 AAGTATTCAAATGGAGCCATTGTAATCGATGACAATGTGATGCCTTTAGGCCTAGATCACGGTCTACATCGCGTAAGCGTAACGA\
1086 TTATGACCCAACGAGAGGCACACTCATAAAGCGCGGCCCTTACTAGCTAGGCGAGCTTAGCAATCCGTGCAATCACACCCAGACG\
1087 GGTTGAGCTAAGCTACGGAACACCACGCGATGAGCCGTATGAAGAATGGACCCGTCGAAAATGTGGACAGCGAGCATCAGGACAA\
1088 TTGCTCGGGTCCGCGACTCGTGCGGAACTGTAAACGTCGAGGCACAACACGATTAGCCAGGTGAACATGTAGACCGCCCCGTAGA\
1089 TATTTTACAGTATTAATCGCCTAGTCTGTATAGGATCTTCGTTCTGCCTGTGAACATGCGGGAAGCCTGCTTGAGATTGGCAGCG\
1090 TCTTTGGGCAAGCGAGGACACGGGCAAATCGAGGTGG";
1091 let scoring = Scoring::from_scores(-2, -2, 2, -4);
1092 let mut aligner_banded = Aligner::new(scoring, s1);
1093 aligner_banded.global_banded(s2, 25).add_to_graph();
1094 aligner_banded.global_banded(s3, 25);
1095 let scoring = Scoring::from_scores(-2, -2, 2, -4);
1096 let mut aligner_unbanded = Aligner::new(scoring, s1);
1097 aligner_unbanded.global(s2).add_to_graph();
1098 aligner_unbanded.global(s3);
1099 let alignment_banded = aligner_banded.alignment();
1100 let alignment_unbanded = aligner_unbanded.alignment();
1101 for (i, operation) in alignment_banded.operations.iter().enumerate() {
1102 assert_eq!(*operation, alignment_unbanded.operations[i]);
1103 }
1104 }
1105
1106 #[test]
1107 fn test_edge_cases() {
1108 let scoring = Scoring::new(-1, 0, |a: u8, b: u8| if a == b { 1i32 } else { -1i32 });
1110 let mut aligner = Aligner::new(scoring, b"BBA");
1111 aligner.global(b"AAA").add_to_graph();
1112 let g = aligner.graph().map(|_, n| (*n) as char, |_, e| *e);
1113 let dot = format!("{:?}", Dot::new(&g));
1114 let output = "digraph {\n 0 [ label = \"'B'\" ]\n 1 [ label = \"'B'\" ]\n 2 [ label = \"'A'\" ]\
1115 \n 3 [ label = \"'A'\" ]\n 4 [ label = \"'A'\" ]\n 0 -> 1 [ label = \"1\" ]\n 1 -> 2 [ label = \"1\" ]\
1116 \n 3 -> 4 [ label = \"1\" ]\n 4 -> 2 [ label = \"1\" ]\n}\n".to_string();
1117 assert_eq!(dot, output);
1118 let scoring = Scoring::new(-1, 0, |a: u8, b: u8| if a == b { 1i32 } else { -1i32 });
1120 let mut aligner = Aligner::new(scoring, b"AAA");
1121 aligner.global(b"ABA").add_to_graph();
1122 let g = aligner.graph().map(|_, n| (*n) as char, |_, e| *e);
1123 let dot = format!("{:?}", Dot::new(&g));
1124 let output = "digraph {\n 0 [ label = \"'A'\" ]\n 1 [ label = \"'A'\" ]\n 2 [ label = \"'A'\" ]\
1125 \n 3 [ label = \"'B'\" ]\n 0 -> 1 [ label = \"1\" ]\n 1 -> 2 [ label = \"1\" ]\n 0 -> 3 [ label = \"1\" ]\
1126 \n 3 -> 2 [ label = \"1\" ]\n}\n".to_string();
1127 assert_eq!(dot, output);
1128 let scoring = Scoring::new(-1, 0, |a: u8, b: u8| if a == b { 1i32 } else { -1i32 });
1130 let mut aligner = Aligner::new(scoring, b"BBBBBAAA");
1131 aligner.global(b"AAA").add_to_graph();
1132 let g = aligner.graph().map(|_, n| (*n) as char, |_, e| *e);
1133 let dot = format!("{:?}", Dot::new(&g));
1134 let output = "digraph {\n 0 [ label = \"'B'\" ]\n 1 [ label = \"'B'\" ]\n 2 [ label = \"'B'\" ]\
1135 \n 3 [ label = \"'B'\" ]\n 4 [ label = \"'B'\" ]\n 5 [ label = \"'A'\" ]\n 6 [ label = \"'A'\" ]\
1136 \n 7 [ label = \"'A'\" ]\n 0 -> 1 [ label = \"1\" ]\n 1 -> 2 [ label = \"1\" ]\n 2 -> 3 [ label = \"1\" ]\
1137 \n 3 -> 4 [ label = \"1\" ]\n 4 -> 5 [ label = \"1\" ]\n 5 -> 6 [ label = \"2\" ]\n 6 -> 7 [ label = \"2\" ]\n}\n".to_string();
1138 assert_eq!(dot, output);
1139 let scoring = Scoring::new(-1, 0, |a: u8, b: u8| if a == b { 1i32 } else { -1i32 });
1141 let mut aligner = Aligner::new(scoring, b"AAA");
1142 aligner.global(b"BBBBBAAA").add_to_graph();
1143 let g = aligner.graph().map(|_, n| (*n) as char, |_, e| *e);
1144 let dot = format!("{:?}", Dot::new(&g));
1145 let output = "digraph {\n 0 [ label = \"'A'\" ]\n 1 [ label = \"'A'\" ]\n 2 [ label = \"'A'\" ]\
1146 \n 3 [ label = \"'B'\" ]\n 4 [ label = \"'B'\" ]\n 5 [ label = \"'B'\" ]\n 6 [ label = \"'B'\" ]\
1147 \n 7 [ label = \"'B'\" ]\n 0 -> 1 [ label = \"2\" ]\n 1 -> 2 [ label = \"2\" ]\n 3 -> 4 [ label = \"1\" ]\
1148 \n 4 -> 5 [ label = \"1\" ]\n 5 -> 6 [ label = \"1\" ]\n 6 -> 7 [ label = \"1\" ]\n 7 -> 0 [ label = \"1\" ]\n}\n".to_string();
1149 assert_eq!(dot, output);
1150 }
1151
1152 #[test]
1153 fn test_consensus() {
1154 let scoring = Scoring::new(-1, 0, |a: u8, b: u8| if a == b { 1i32 } else { -1i32 });
1155 let mut aligner = Aligner::new(scoring, b"GCATGCUx");
1156 aligner.global(b"GCATGCU").add_to_graph();
1157 aligner.global(b"xCATGCU").add_to_graph();
1158 assert_eq!(aligner.consensus(), b"GCATGCUx");
1159 }
1160
1161 #[test]
1162 fn test_xclip_prefix_custom() {
1163 let x = b"GGGGGGATG";
1164 let y = b"ATG";
1165
1166 let score = |a: u8, b: u8| if a == b { 1i32 } else { -1i32 };
1167 let scoring = Scoring::new(-5, -1, &score).xclip(-5);
1168
1169 let mut aligner = Aligner::new(scoring, x);
1170 let alignment = aligner.custom(y).alignment();
1171
1172 assert_eq!(
1173 alignment.operations,
1174 [
1175 AlignmentOperation::Xclip(0),
1176 AlignmentOperation::Match(Some((5, 6))),
1177 AlignmentOperation::Match(Some((6, 7))),
1178 AlignmentOperation::Match(Some((7, 8)))
1179 ]
1180 );
1181 }
1182
1183 #[test]
1184 fn test_yclip_prefix_custom() {
1185 let y = b"GGGGGGATG";
1186 let x = b"ATG";
1187
1188 let score = |a: u8, b: u8| if a == b { 1i32 } else { -1i32 };
1189 let scoring = Scoring::new(-5, -1, &score).yclip(-5);
1190
1191 let mut aligner = Aligner::new(scoring, x);
1192 let alignment = aligner.custom(y).alignment();
1193
1194 assert_eq!(
1195 alignment.operations,
1196 [
1197 AlignmentOperation::Yclip(0, 6),
1198 AlignmentOperation::Match(None),
1199 AlignmentOperation::Match(Some((0, 1))),
1200 AlignmentOperation::Match(Some((1, 2)))
1201 ]
1202 );
1203 }
1204
1205 #[test]
1206 fn test_xclip_suffix_custom() {
1207 let x = b"GAAAA";
1208 let y = b"CG";
1209
1210 let score = |a: u8, b: u8| if a == b { 1i32 } else { -1i32 };
1211 let scoring = Scoring::new(-5, -1, &score).xclip(0).yclip(0);
1212
1213 let mut aligner = Aligner::new(scoring, x);
1214 let alignment = aligner.custom(y).alignment();
1215
1216 assert_eq!(
1217 alignment.operations,
1218 [
1219 AlignmentOperation::Yclip(0, 1),
1220 AlignmentOperation::Match(None),
1221 AlignmentOperation::Xclip(1)
1222 ]
1223 );
1224 }
1225
1226 #[test]
1227 fn test_yclip_suffix_custom() {
1228 let y = b"GAAAA";
1229 let x = b"CG";
1230
1231 let score = |a: u8, b: u8| if a == b { 3i32 } else { -3i32 };
1232 let scoring = Scoring::new(-5, -1, &score).yclip(-5).xclip(0);
1233
1234 let mut aligner = Aligner::new(scoring, x);
1235 let alignment = aligner.custom(y).alignment();
1236
1237 assert_eq!(
1238 alignment.operations,
1239 [
1240 AlignmentOperation::Xclip(0),
1241 AlignmentOperation::Match(Some((0, 1))),
1242 AlignmentOperation::Yclip(1, 5)
1243 ]
1244 );
1245 }
1246}