bio/alignment/
poa.rs

1// Copyright 2017-2025 Brett Bowman, Jeff Knaggs, Minindu Weerakoon
2// Licensed under the MIT license (http://opensource.org/licenses/MIT)
3// This file may not be copied, modified, or distributed
4// except according to those terms.
5
6//! Partial-Order Alignment for fast alignment and consensus of multiple homologous sequences.
7//!
8//! - time complexity: `O(N^2 * L^2)`, where `N` is the number of sequences and `L` is the length of each sequence.
9//!
10//! For the original concept and theory, see:
11//! * Lee, Christopher, Catherine Grasso, and Mark F. Sharlow. "Multiple sequence alignment using
12//! partial order graphs." Bioinformatics 18.3 (2002): 452-464.
13//! * Lee, Christopher. "Generating consensus sequences from partial order multiple sequence
14//! alignment graphs." Bioinformatics 19.8 (2003): 999-1008.
15//!
16//! For a modern reference implementation, see poapy:
17//! https://github.com/ljdursi/poapy
18//!
19//! # Example
20//!
21//! ```
22//! use bio::alignment::pairwise::Scoring;
23//! use bio::alignment::poa::*;
24//!
25//! let x = b"AAAAAAA";
26//! let y = b"AABBBAA";
27//! let z = b"AABCBAA";
28//!
29//! let scoring = Scoring::new(-1, 0, |a: u8, b: u8| if a == b { 1i32 } else { -1i32 });
30//! let mut aligner = Aligner::new(scoring, x);
31//! // z differs from x in 3 locations
32//! assert_eq!(aligner.global(z).alignment().score, 1);
33//! aligner.global(y).add_to_graph();
34//! // z differs from x and y's partial order alignment by 1 base
35//! assert_eq!(aligner.global(z).alignment().score, 5);
36//! ```
37
38use std::cmp::max;
39
40use crate::utils::TextSlice;
41
42use crate::alignment::pairwise::{MatchFunc, Scoring};
43
44use petgraph::graph::NodeIndex;
45use petgraph::visit::Topo;
46
47use petgraph::{Directed, Graph, Incoming};
48
49pub const MIN_SCORE: i32 = -858_993_459; // negative infinity; see alignment/pairwise/mod.rs
50pub type POAGraph = Graph<u8, i32, Directed, usize>;
51
52// Unlike with a total order we may have arbitrary successors in the
53// traceback matrix. I have not yet figured out what the best level of
54// detail to store is, so Match and Del operations remember In and Out
55// nodes on the reference graph.
56#[derive(Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash, Debug, Serialize, Deserialize)]
57pub enum AlignmentOperation {
58    Match(Option<(usize, usize)>),
59    Del(Option<(usize, usize)>),
60    Ins(Option<usize>),
61    Xclip(usize),
62    Yclip(usize, usize), // to, from
63}
64
65#[derive(Default, Clone, Eq, PartialEq, Ord, PartialOrd, Hash, Debug, Serialize, Deserialize)]
66pub struct Alignment {
67    pub score: i32,
68    //    xstart: Edge,
69    operations: Vec<AlignmentOperation>,
70}
71
72impl Alignment {
73    /// Return the pretty formatted poa alignment as a String. The string
74    /// contains sets of (number of queries + 1) lines of length (ncol). First line is for the
75    /// consensus sequence, rest are for the queries. A '-' in the sequence
76    /// indicates a blank (insertion/deletion).
77    ///
78    /// # Example
79    ///
80    /// If we align the strings,
81    /// "ACCCCCTTTTTCCGG"
82    /// "ACTTCCCTTTTTCCGG"
83    /// "ACCGCCTTTTTCCGG"
84    /// "ACCCCCTGTTTCAAGG"
85    /// we will get the following output:
86    ///
87    /// ```c
88    /// cons:   ACC---CCCTT-TTTCC--GG
89    /// seq1:   ACC---CCCTT-TTTCC--GG
90    /// seq2:   AC--TTCCCTT-TTTCC--GG
91    /// seq3:   ACCG--CC-TT-TTTCC--GG
92    /// seq4:   ACC---CCCT-GTTTC-AAGG
93    /// ```
94    pub fn pretty(
95        &self,
96        consensus: TextSlice,
97        sequences: Vec<TextSlice>,
98        graph: &POAGraph,
99        ncol: usize,
100    ) -> String {
101        // initialize required variables
102        let mut seq_indices: Vec<usize> = vec![0; sequences.len()];
103        let mut seq_pretty: Vec<Vec<u8>> = vec![vec![]; sequences.len()];
104        let mut con_index: usize = 0;
105        let mut con_pretty: Vec<u8> = vec![];
106        let mut topo = Topo::new(graph);
107        // go through the nodes topologically
108        while let Some(node) = topo.next(graph) {
109            let topo_base = graph.raw_nodes()[node.index()].weight;
110            let mut all_null = true;
111            // go through the sequences checking if bases match with the current node base
112            for current_seq in 0..sequences.len() {
113                if seq_indices[current_seq] >= sequences[current_seq].len() {
114                    seq_pretty[current_seq].push(b'-');
115                } else if sequences[current_seq][seq_indices[current_seq]] == topo_base {
116                    seq_pretty[current_seq].push(topo_base);
117                    seq_indices[current_seq] += 1;
118                    all_null = false;
119                } else {
120                    seq_pretty[current_seq].push(b'-');
121                }
122            }
123            // do the same for the consensus
124            if con_index >= consensus.len() {
125                con_pretty.push(b'-');
126            } else if consensus[con_index] == topo_base {
127                con_pretty.push(topo_base);
128                con_index += 1;
129                all_null = false;
130            } else {
131                con_pretty.push(b'-');
132            }
133            if all_null {
134                for current_seq in seq_pretty.iter_mut() {
135                    current_seq.pop();
136                }
137                con_pretty.pop();
138            }
139        }
140        let mut s = String::new();
141        let mut idx = 0;
142        use std::cmp::min;
143
144        let ml = con_pretty.len();
145
146        while idx < ml {
147            let rng = idx..min(idx + ncol, ml);
148            s.push_str(&format!(
149                "cons:\t{}\n",
150                &String::from_utf8_lossy(&con_pretty[rng.clone()])
151            ));
152            for (seq_index, seq) in seq_pretty.iter().enumerate() {
153                s.push_str(&format!(
154                    "seq{}:\t{}\n",
155                    seq_index + 1,
156                    &String::from_utf8_lossy(&seq[rng.clone()])
157                ));
158            }
159            s.push('\n');
160            idx += ncol;
161        }
162        s
163    }
164}
165
166#[derive(Default, Clone, Eq, PartialEq, Ord, PartialOrd, Debug)]
167pub struct Traceback {
168    rows: usize,
169    cols: usize,
170    // store these for xclipping and yclipping
171    best_in_last_row: usize,
172    best_in_last_col: usize,
173    best_overall: (usize, usize),
174    // store the last visited node in topological order so that
175    // we can index into the end of the alignment when we backtrack
176    last: NodeIndex<usize>,
177    // start and end of each row in matrix for banded poa
178    start_end_vec: Vec<(usize, usize)>,
179    // score matrix
180    matrix: Vec<Vec<i32>>,
181}
182
183impl Traceback {
184    /// Create a Traceback matrix with given maximum sizes
185    ///
186    /// # Arguments
187    ///
188    /// * `m` - the number of nodes in the DAG
189    /// * `n` - the length of the query sequence
190    fn with_capacity(m: usize, n: usize) -> Self {
191        // each row of matrix contain start end position and vec of traceback cells
192        let matrix: Vec<Vec<i32>> = vec![vec![]; m + 1];
193        let start_end_vec = vec![(0, n + 1); m + 1];
194        Traceback {
195            rows: m,
196            cols: n,
197            best_in_last_row: 0,
198            best_in_last_col: 0,
199            best_overall: (0, 0),
200            last: NodeIndex::new(0),
201            start_end_vec: start_end_vec,
202            matrix,
203        }
204    }
205    /// Populate the first row of the traceback matrix
206    fn initialize_scores(&mut self, gap_open: i32, yclip_prefix: i32) {
207        for j in 0..=self.cols {
208            self.matrix[0].push(max((j as i32) * gap_open, yclip_prefix));
209        }
210        self.matrix[0][0] = 0;
211    }
212
213    fn new() -> Self {
214        Traceback {
215            rows: 0,
216            cols: 0,
217            best_in_last_row: 0,
218            best_in_last_col: 0,
219            best_overall: (0, 0),
220            last: NodeIndex::new(0),
221            start_end_vec: Vec::new(),
222            matrix: Vec::new(),
223        }
224    }
225
226    // create a new row according to the parameters
227    fn new_row(
228        &mut self,
229        row: usize,
230        size: usize,
231        gap_open: i32,
232        xclip_prefix: i32,
233        start: usize,
234        end: usize,
235    ) {
236        self.start_end_vec[row] = (start, end);
237        // when the row starts from the edge
238        if start == 0 {
239            self.matrix[row].push(max((row as i32) * gap_open, xclip_prefix));
240        } else {
241            self.matrix[row].push(MIN_SCORE);
242        }
243        for _ in 1..=size {
244            self.matrix[row].push(MIN_SCORE);
245        }
246    }
247
248    fn set(&mut self, i: usize, j: usize, cell: i32) {
249        // set the matrix cell if in band range
250        if !(self.start_end_vec[i].0 > j || self.start_end_vec[i].1 < j) {
251            let real_position = j - self.start_end_vec[i].0;
252            self.matrix[i][real_position] = cell;
253        }
254    }
255
256    fn get(&self, i: usize, j: usize) -> i32 {
257        // get the matrix cell if in band range else return the appropriate values
258        if !(self.start_end_vec[i].0 > j
259            || self.start_end_vec[i].1 <= j
260            || self.matrix[i].is_empty())
261        {
262            let real_position = j - self.start_end_vec[i].0;
263            self.matrix[i][real_position]
264        }
265        // out of band
266        else {
267            MIN_SCORE
268        }
269    }
270}
271
272/// A partially ordered aligner builder
273///
274/// Uses consuming builder pattern for constructing partial order alignments with method chaining
275#[derive(Default, Clone, Debug)]
276pub struct Aligner<F: MatchFunc> {
277    traceback: Traceback,
278    query: Vec<u8>,
279    poa: Poa<F>,
280}
281
282impl<F: MatchFunc> Aligner<F> {
283    /// Create new instance.
284    pub fn new(scoring: Scoring<F>, reference: TextSlice) -> Self {
285        Aligner {
286            traceback: Traceback::new(),
287            query: reference.to_vec(),
288            poa: Poa::from_string(scoring, reference),
289        }
290    }
291
292    /// Get the alignment of the last query to the graph and add to graph.
293    pub fn add_to_graph(&mut self) -> &mut Self {
294        let alignment = self.poa.recalculate_alignment(&self.traceback);
295        self.poa.add_alignment(&alignment, &self.query);
296        self
297    }
298
299    /// Return alignment of last added query against the graph.
300    pub fn alignment(&self) -> Alignment {
301        self.poa.recalculate_alignment(&self.traceback)
302    }
303
304    /// Add the alignment to the graph
305    pub fn add_alignment(&mut self, alignment: &Alignment) -> &mut Self {
306        self.poa.add_alignment(&alignment, &self.query);
307        self
308    }
309
310    /// Globally align a given query against the graph.
311    pub fn global(&mut self, query: TextSlice) -> &mut Self {
312        // Store the current clip penalties
313        let clip_penalties = [
314            self.poa.scoring.xclip_prefix,
315            self.poa.scoring.xclip_suffix,
316            self.poa.scoring.yclip_prefix,
317            self.poa.scoring.yclip_suffix,
318        ];
319
320        // Temporarily Over-write the clip penalties
321        self.poa.scoring.xclip_prefix = MIN_SCORE;
322        self.poa.scoring.xclip_suffix = MIN_SCORE;
323        self.poa.scoring.yclip_prefix = MIN_SCORE;
324        self.poa.scoring.yclip_suffix = MIN_SCORE;
325
326        self.query = query.to_vec();
327        self.traceback = self.poa.custom(query);
328
329        // Set the clip penalties to the original values
330        self.poa.scoring.xclip_prefix = clip_penalties[0];
331        self.poa.scoring.xclip_suffix = clip_penalties[1];
332        self.poa.scoring.yclip_prefix = clip_penalties[2];
333        self.poa.scoring.yclip_suffix = clip_penalties[3];
334
335        self
336    }
337
338    /// Semi-globally align a given query against the graph.
339    pub fn semiglobal(&mut self, query: TextSlice) -> &mut Self {
340        // Store the current clip penalties
341        let clip_penalties = [
342            self.poa.scoring.xclip_prefix,
343            self.poa.scoring.xclip_suffix,
344            self.poa.scoring.yclip_prefix,
345            self.poa.scoring.yclip_suffix,
346        ];
347
348        // Temporarily Over-write the clip penalties
349        self.poa.scoring.xclip_prefix = MIN_SCORE;
350        self.poa.scoring.xclip_suffix = MIN_SCORE;
351        self.poa.scoring.yclip_prefix = 0;
352        self.poa.scoring.yclip_suffix = 0;
353
354        self.query = query.to_vec();
355        self.traceback = self.poa.custom(query);
356
357        // Set the clip penalties to the original values
358        self.poa.scoring.xclip_prefix = clip_penalties[0];
359        self.poa.scoring.xclip_suffix = clip_penalties[1];
360        self.poa.scoring.yclip_prefix = clip_penalties[2];
361        self.poa.scoring.yclip_suffix = clip_penalties[3];
362
363        self
364    }
365
366    /// Locally align a given query against the graph.
367    pub fn local(&mut self, query: TextSlice) -> &mut Self {
368        // Store the current clip penalties
369        let clip_penalties = [
370            self.poa.scoring.xclip_prefix,
371            self.poa.scoring.xclip_suffix,
372            self.poa.scoring.yclip_prefix,
373            self.poa.scoring.yclip_suffix,
374        ];
375
376        // Temporarily Over-write the clip penalties
377        self.poa.scoring.xclip_prefix = 0;
378        self.poa.scoring.xclip_suffix = 0;
379        self.poa.scoring.yclip_prefix = 0;
380        self.poa.scoring.yclip_suffix = 0;
381
382        self.query = query.to_vec();
383        self.traceback = self.poa.custom(query);
384
385        // Set the clip penalties to the original values
386        self.poa.scoring.xclip_prefix = clip_penalties[0];
387        self.poa.scoring.xclip_suffix = clip_penalties[1];
388        self.poa.scoring.yclip_prefix = clip_penalties[2];
389        self.poa.scoring.yclip_suffix = clip_penalties[3];
390
391        self
392    }
393
394    /// Custom align a given query against the graph with custom xclip and yclip penalties.
395    pub fn custom(&mut self, query: TextSlice) -> &mut Self {
396        self.query = query.to_vec();
397        self.traceback = self.poa.custom(query);
398        self
399    }
400
401    /// Globally align a given query against the graph with a band around the previous
402    /// optimal score for speed.
403    pub fn global_banded(&mut self, query: TextSlice, bandwidth: usize) -> &mut Self {
404        self.query = query.to_vec();
405        self.traceback = self.poa.global_banded(query, bandwidth);
406        self
407    }
408
409    /// Return alignment graph.
410    pub fn graph(&self) -> &POAGraph {
411        &self.poa.graph
412    }
413    /// Return the consensus sequence generated from the POA graph.
414    pub fn consensus(&self) -> Vec<u8> {
415        let mut consensus: Vec<u8> = vec![];
416        let max_index = self.poa.graph.node_count();
417        let mut weight_score_next_vec: Vec<(i32, i32, usize)> = vec![(0, 0, 0); max_index + 1];
418        let mut topo = Topo::new(&self.poa.graph);
419        // go through the nodes topologically
420        while let Some(node) = topo.next(&self.poa.graph) {
421            let mut best_weight_score_next: (i32, i32, usize) = (0, 0, usize::MAX);
422            let neighbour_nodes = self.poa.graph.neighbors_directed(node, Incoming);
423            // go through the incoming neighbour nodes
424            for neighbour_node in neighbour_nodes {
425                let neighbour_index = neighbour_node.index();
426                let neighbour_score = weight_score_next_vec[neighbour_index].1;
427                let edges = self.poa.graph.edges_connecting(neighbour_node, node);
428                let weight = edges.map(|edge| edge.weight()).sum();
429                let current_node_score = weight + neighbour_score;
430                // save the neighbour node with the highest weight and score as best
431                if (weight, current_node_score, neighbour_index) > best_weight_score_next {
432                    best_weight_score_next = (weight, current_node_score, neighbour_index);
433                }
434            }
435            weight_score_next_vec[node.index()] = best_weight_score_next;
436        }
437        // get the index of the max scored node (end of consensus)
438        let mut pos = weight_score_next_vec
439            .iter()
440            .enumerate()
441            .max_by_key(|(_, &value)| value.1)
442            .map(|(idx, _)| idx)
443            .unwrap();
444        // go through weight_score_next_vec appending to the consensus
445        while pos != usize::MAX {
446            consensus.push(self.poa.graph.raw_nodes()[pos].weight);
447            pos = weight_score_next_vec[pos].2;
448        }
449        consensus.reverse();
450        consensus
451    }
452}
453
454/// A partially ordered alignment graph
455///
456/// A directed acyclic graph datastructure that represents the topology of a
457/// traceback matrix.
458#[derive(Default, Clone, Debug)]
459pub struct Poa<F: MatchFunc> {
460    scoring: Scoring<F>,
461    pub graph: POAGraph,
462}
463
464impl<F: MatchFunc> Poa<F> {
465    /// Create a new aligner instance from the directed acyclic graph of another.
466    ///
467    /// # Arguments
468    ///
469    /// * `scoring` - the score struct
470    /// * `poa` - the partially ordered reference alignment
471    pub fn new(scoring: Scoring<F>, graph: POAGraph) -> Self {
472        Poa { scoring, graph }
473    }
474
475    /// Create a new POA graph from an initial reference sequence and alignment penalties.
476    ///
477    /// # Arguments
478    ///
479    /// * `scoring` - the score struct
480    /// * `reference` - a reference TextSlice to populate the initial reference graph
481    pub fn from_string(scoring: Scoring<F>, seq: TextSlice) -> Self {
482        let mut graph: Graph<u8, i32, Directed, usize> =
483            Graph::with_capacity(seq.len(), seq.len() - 1);
484        let mut prev: NodeIndex<usize> = graph.add_node(seq[0]);
485        let mut node: NodeIndex<usize>;
486        for base in seq.iter().skip(1) {
487            node = graph.add_node(*base);
488            graph.add_edge(prev, node, 1);
489            prev = node;
490        }
491
492        Poa { scoring, graph }
493    }
494    /// A global Needleman-Wunsch aligner on partially ordered graphs.
495    ///
496    /// # Arguments
497    /// * `query` - the query TextSlice to align against the internal graph member
498    pub fn custom(&self, query: TextSlice) -> Traceback {
499        assert!(self.graph.node_count() != 0);
500        // dimensions of the traceback matrix
501        let (m, n) = (self.graph.node_count(), query.len());
502        // save score location of the max scoring node for the query for suffix clipping
503        let mut traceback = Traceback::with_capacity(m, n);
504        traceback.initialize_scores(self.scoring.gap_open, self.scoring.yclip_prefix);
505        // construct the score matrix (O(n^2) space)
506        let mut topo = Topo::new(&self.graph);
507        // required for semi global/ local alignment
508        let mut max_score_last_column = i32::MIN; // score, row
509        let mut max_score_overall = 0; // score, row, column
510        while let Some(node) = topo.next(&self.graph) {
511            let mut max_score_last_row = i32::MIN; // score, column
512            let r = self.graph.raw_nodes()[node.index()].weight; // reference base at previous index
513            let i = node.index() + 1; // 0 index is for initialization so we start at 1
514            traceback.last = node;
515            // iterate over the predecessors of this node
516            let prevs: Vec<NodeIndex<usize>> =
517                self.graph.neighbors_directed(node, Incoming).collect();
518            traceback.new_row(
519                i,
520                n + 1,
521                self.scoring.gap_open,
522                self.scoring.xclip_prefix,
523                0,
524                n + 1,
525            );
526            // get y clip min
527            let y_clip_min = traceback.get(i, 0) + self.scoring.yclip_prefix;
528            // query base and its index in the DAG (traceback matrix rows)
529            for (query_index, query_base) in query.iter().enumerate() {
530                let j = query_index + 1; // 0 index is initialized so we start at 1
531                let max_cell = if prevs.is_empty() {
532                    traceback.get(0, j - 1) + self.scoring.match_fn.score(r, *query_base)
533                } else {
534                    // get x clip min
535                    let x_clip_min = traceback.get(0, j) + self.scoring.xclip_prefix;
536                    let mut max_cell = max(MIN_SCORE, max(x_clip_min, y_clip_min));
537                    for prev_node in &prevs {
538                        let i_p: usize = prev_node.index() + 1; // index of previous node
539                        max_cell = max(
540                            max_cell,
541                            max(
542                                traceback.get(i_p, j - 1)
543                                    + self.scoring.match_fn.score(r, *query_base),
544                                traceback.get(i_p, j) + self.scoring.gap_open,
545                            ),
546                        );
547                    }
548                    max_cell
549                };
550                let score = max(max_cell, traceback.get(i, j - 1) + self.scoring.gap_open);
551                if score > max_score_last_row {
552                    max_score_last_row = score;
553                    traceback.best_in_last_row = j;
554                }
555                if (score > max_score_last_column) && (query_index == query.len() - 1) {
556                    max_score_last_column = score;
557                    traceback.best_in_last_col = i;
558                }
559                if score > max_score_overall {
560                    max_score_overall = score;
561                    traceback.best_overall = (i, j);
562                }
563                traceback.set(i, j, score);
564            }
565        }
566        traceback
567    }
568    /// A global Needleman-Wunsch aligner on partially ordered graphs with banding.
569    ///
570    /// # Arguments
571    /// * `query` - the query TextSlice to align against the internal graph member
572    /// * `bandwidth` - width of band, if too small, alignment may be suboptimal
573    pub fn global_banded(&self, query: TextSlice, bandwidth: usize) -> Traceback {
574        assert!(self.graph.node_count() != 0);
575
576        // dimensions of the traceback matrix
577        let (m, n) = (self.graph.node_count(), query.len());
578        let mut traceback = Traceback::with_capacity(m, n);
579        traceback.initialize_scores(self.scoring.gap_open, self.scoring.yclip_prefix);
580
581        traceback.set(0, 0, 0);
582
583        // construct the score matrix (O(n^2) space)
584        // but this sucks, we want linear time!!!
585        // at each row i we want to find the max scoring j
586        // and band
587        let mut max_scoring_j = 0;
588        let mut max_score_for_row = MIN_SCORE;
589        let mut topo = Topo::new(&self.graph);
590        while let Some(node) = topo.next(&self.graph) {
591            // reference base and index
592            let r = self.graph.raw_nodes()[node.index()].weight; // reference base at previous index
593            let i = node.index() + 1; // 0 index is for initialization so we start at 1
594            traceback.last = node;
595            // iterate over the predecessors of this node
596            let prevs: Vec<NodeIndex<usize>> =
597                self.graph.neighbors_directed(node, Incoming).collect();
598            let start = if bandwidth > max_scoring_j {
599                0
600            } else {
601                max_scoring_j - bandwidth
602            };
603            let end = max_scoring_j + bandwidth;
604            traceback.new_row(
605                i,
606                (end - start) + 1,
607                self.scoring.gap_open,
608                self.scoring.xclip_prefix,
609                start,
610                end + 1,
611            );
612            for (query_index, query_base) in query.iter().enumerate().skip(start) {
613                let j = query_index + 1; // 0 index is initialized so we start at 1
614                if j > end {
615                    break;
616                }
617                let max_cell = if prevs.is_empty() {
618                    traceback.get(0, j - 1) + self.scoring.match_fn.score(r, *query_base)
619                } else {
620                    let mut max_cell = MIN_SCORE;
621                    for prev_node in &prevs {
622                        let i_p: usize = prev_node.index() + 1; // index of previous node
623                        max_cell = max(
624                            max_cell,
625                            max(
626                                traceback.get(i_p, j - 1)
627                                    + self.scoring.match_fn.score(r, *query_base),
628                                traceback.get(i_p, j) + self.scoring.gap_open,
629                            ),
630                        );
631                    }
632                    max_cell
633                };
634
635                let score = max(max_cell, traceback.get(i, j - 1) + self.scoring.gap_open);
636                if score > max_score_for_row {
637                    max_scoring_j = j;
638                    max_score_for_row = score;
639                }
640                traceback.set(i, j, score);
641            }
642        }
643        traceback
644    }
645    /// Computes the alignment using the provided traceback structure.
646    /// This function requires the original graph used to generate the traceback score matrix.
647    ///
648    /// time complexity: `O(N * L)`, where `N` is the number of sequences and `L` is the length of each sequence.
649    ///
650    /// # Arguments
651    /// * `traceback` - the traceback struct
652    pub fn recalculate_alignment(&self, traceback: &Traceback) -> Alignment {
653        // Get the alignment by backtracking and recalculating stuff
654        let mut ops: Vec<AlignmentOperation> = vec![];
655        // loop until we reach a node with no incoming
656        let last_node = traceback.last.index() + 1;
657        let last_query = traceback.cols;
658        let final_score = traceback.get(last_node, last_query);
659
660        let mut curr_node = traceback.last.index() + 1;
661        let mut curr_query = traceback.cols;
662        let xy_score = traceback.get(traceback.best_overall.0, traceback.best_overall.1)
663            + self.scoring.xclip_suffix
664            + self.scoring.yclip_suffix;
665        let y_score =
666            traceback.get(last_node, traceback.best_in_last_row) + self.scoring.yclip_suffix;
667        let x_score =
668            traceback.get(traceback.best_in_last_col, last_query) + self.scoring.xclip_suffix;
669        // x and y suffix make sure not in last row and column
670        if (xy_score >= final_score)
671            && (xy_score >= x_score)
672            && (xy_score >= y_score)
673            && (traceback.best_overall.1 != last_query)
674            && (traceback.best_overall.0 != last_node)
675        {
676            ops.push(AlignmentOperation::Xclip(traceback.best_overall.0));
677            ops.push(AlignmentOperation::Yclip(
678                traceback.best_overall.1,
679                last_query,
680            ));
681            curr_node = traceback.best_overall.0;
682            curr_query = traceback.best_overall.1;
683        }
684        // y suffix
685        else if (y_score >= final_score)
686            && (y_score >= x_score)
687            && (traceback.best_in_last_row != last_query)
688        {
689            ops.push(AlignmentOperation::Yclip(
690                traceback.best_in_last_row,
691                last_query,
692            ));
693            curr_query = traceback.best_in_last_row;
694        }
695        // x suffix
696        else if (x_score >= final_score) && (traceback.best_in_last_col != last_node) {
697            ops.push(AlignmentOperation::Xclip(traceback.best_in_last_col));
698            curr_node = traceback.best_in_last_col;
699        }
700        loop {
701            let mut current_alignment_operation = AlignmentOperation::Match(None);
702            let current_cell_score = traceback.get(curr_node, curr_query);
703            let mut next_jump = curr_query;
704            let mut next_node = 1;
705            // check left if gap open difference with left
706            let prevs: Vec<NodeIndex<usize>> = self
707                .graph
708                .neighbors_directed(NodeIndex::new(curr_node - 1), Incoming)
709                .collect();
710            let mut jump_up_score = MIN_SCORE;
711            let mut jump_diagonal_score = MIN_SCORE;
712            let jump_left_score = traceback.get(curr_node, curr_query - 1) + self.scoring.gap_open;
713            if current_cell_score == jump_left_score {
714                current_alignment_operation = AlignmentOperation::Ins(Some(curr_node - 1));
715                next_node = curr_node;
716                next_jump = curr_query - 1;
717            } else {
718                for prev in &prevs {
719                    let prev_node = prev.index() + 1;
720                    let diagonal_score = traceback.get(prev_node, curr_query - 1);
721                    let top_score = traceback.get(prev_node, curr_query);
722                    // Top
723                    if current_cell_score == top_score + self.scoring.gap_open {
724                        jump_up_score =
725                            traceback.get(prev_node, curr_query) + self.scoring.gap_open;
726                        current_alignment_operation = AlignmentOperation::Del(None);
727                        next_jump = curr_query;
728                        next_node = prev_node;
729                    }
730                    // Diagonal mismatch
731                    else if current_cell_score
732                        == diagonal_score + self.scoring.match_fn.score(0, 1)
733                    {
734                        jump_diagonal_score = traceback.get(prev_node, curr_query - 1)
735                            + self.scoring.match_fn.score(0, 1);
736                        current_alignment_operation =
737                            AlignmentOperation::Match(Some((prev_node - 1, curr_node - 1)));
738                        next_node = prev_node;
739                        next_jump = curr_query - 1;
740                    }
741                    // Diagonal match
742                    else if current_cell_score
743                        == diagonal_score + self.scoring.match_fn.score(0, 0)
744                    {
745                        jump_diagonal_score = traceback.get(prev_node, curr_query - 1)
746                            + self.scoring.match_fn.score(0, 0);
747                        current_alignment_operation =
748                            AlignmentOperation::Match(Some((prev_node - 1, curr_node - 1)));
749                        next_node = prev_node;
750                        next_jump = curr_query - 1;
751                    }
752                }
753                // Last node to avoid Match(None) which should never occur
754                if prevs.len() == 0 {
755                    // match
756                    if current_cell_score
757                        == traceback.get(0, curr_query - 1) + self.scoring.match_fn.score(0, 0)
758                    {
759                        current_alignment_operation = AlignmentOperation::Match(None);
760                        jump_diagonal_score =
761                            traceback.get(0, curr_query - 1) + self.scoring.match_fn.score(0, 0);
762                        next_node = 1;
763                        next_jump = curr_query - 1;
764                    }
765                    // mismatch
766                    if current_cell_score
767                        == traceback.get(0, curr_query - 1) + self.scoring.match_fn.score(0, 1)
768                    {
769                        current_alignment_operation = AlignmentOperation::Match(None);
770                        jump_diagonal_score =
771                            traceback.get(0, curr_query - 1) + self.scoring.match_fn.score(0, 1);
772                        next_node = 1;
773                        next_jump = curr_query - 1;
774                    }
775                }
776            }
777            // max score of the three
778            let max_score = max(jump_diagonal_score, max(jump_up_score, jump_left_score));
779            // x clip
780            if self.scoring.xclip_prefix >= max_score {
781                next_node = 0;
782                current_alignment_operation = AlignmentOperation::Xclip(0);
783            }
784            // y clip
785            if self.scoring.yclip_prefix >= max(max_score, self.scoring.xclip_prefix) {
786                next_jump = 0;
787                current_alignment_operation = AlignmentOperation::Yclip(0, curr_query);
788            }
789            ops.push(current_alignment_operation);
790            // iterate to next
791            curr_query = next_jump;
792            curr_node = next_node;
793            // break point
794            if prevs.len() == 0 || curr_query == 0 {
795                //if at end but not at start of query add bunch of ins(None)
796                if prevs.len() == 0 {
797                    if curr_query > 0 {
798                        for _ in 0..curr_query {
799                            if self.scoring.yclip_prefix > MIN_SCORE {
800                                ops.push(AlignmentOperation::Yclip(0, curr_query));
801                                break;
802                            } else {
803                                ops.push(AlignmentOperation::Ins(None));
804                            }
805                        }
806                    }
807                } else {
808                    //if at start of query but prev nodes available
809                    if self.scoring.xclip_prefix > MIN_SCORE {
810                        ops.push(AlignmentOperation::Xclip(0));
811                    } else {
812                        ops.push(AlignmentOperation::Del(None));
813                    }
814                }
815                break;
816            }
817        }
818        ops.reverse();
819        Alignment {
820            score: final_score as i32,
821            operations: ops,
822        }
823    }
824    /// Experimental: return sequence of traversed edges
825    ///
826    /// Only supports alignments for sequences that have already been added,
827    /// so all operations must be Match.
828    pub fn edges(&self, aln: Alignment) -> Vec<usize> {
829        let mut path: Vec<usize> = vec![];
830        let mut prev: NodeIndex<usize> = NodeIndex::new(0);
831        let mut _i: usize = 0;
832        for op in aln.operations {
833            match op {
834                AlignmentOperation::Match(None) => {
835                    _i += 1;
836                }
837                AlignmentOperation::Match(Some((_, p))) => {
838                    let node = NodeIndex::new(p);
839                    let edge = self.graph.find_edge(prev, node).unwrap();
840                    path.push(edge.index());
841                    prev = NodeIndex::new(p);
842                    _i += 1;
843                }
844                AlignmentOperation::Ins(None) => {}
845                AlignmentOperation::Ins(Some(_)) => {}
846                AlignmentOperation::Del(_) => {}
847                AlignmentOperation::Xclip(_) => {}
848                AlignmentOperation::Yclip(_, _) => {}
849            }
850        }
851        path
852    }
853
854    /// Incorporate a new sequence into a graph from an alignment
855    ///
856    /// # Arguments
857    ///
858    /// * `aln` - The alignment of the new sequence to the graph
859    /// * `seq` - The sequence being incorporated
860    pub fn add_alignment(&mut self, aln: &Alignment, seq: TextSlice) {
861        let head = Topo::new(&self.graph).next(&self.graph).unwrap();
862        let mut prev: NodeIndex<usize> = NodeIndex::new(head.index());
863        let mut i: usize = 0;
864        let mut edge_not_connected: bool = false;
865        for op in &aln.operations {
866            match op {
867                AlignmentOperation::Match(None) => {
868                    let node: NodeIndex<usize> = NodeIndex::new(head.index());
869                    if (seq[i] != self.graph.raw_nodes()[head.index()].weight) && (seq[i] != b'X') {
870                        let node = self.graph.add_node(seq[i]);
871                        if edge_not_connected {
872                            self.graph.add_edge(prev, node, 1);
873                        }
874                        edge_not_connected = false;
875                        prev = node;
876                    }
877                    if edge_not_connected {
878                        self.graph.add_edge(prev, node, 1);
879                        prev = node;
880                        edge_not_connected = false;
881                    }
882                    i += 1;
883                }
884                AlignmentOperation::Match(Some((_, p))) => {
885                    let node = NodeIndex::new(*p);
886                    if (seq[i] != self.graph.raw_nodes()[*p].weight) && (seq[i] != b'X') {
887                        let node = self.graph.add_node(seq[i]);
888                        self.graph.add_edge(prev, node, 1);
889                        prev = node;
890                    } else {
891                        // increment node weight
892                        match self.graph.find_edge(prev, node) {
893                            Some(edge) => {
894                                *self.graph.edge_weight_mut(edge).unwrap() += 1;
895                            }
896                            None => {
897                                if prev.index() != head.index() && prev.index() != node.index() {
898                                    self.graph.add_edge(prev, node, 1);
899                                }
900                            }
901                        }
902                        prev = NodeIndex::new(*p);
903                    }
904                    i += 1;
905                }
906                AlignmentOperation::Ins(None) => {
907                    let node = self.graph.add_node(seq[i]);
908                    if edge_not_connected {
909                        self.graph.add_edge(prev, node, 1);
910                    }
911                    prev = node;
912                    edge_not_connected = true;
913                    i += 1;
914                }
915                AlignmentOperation::Ins(Some(_)) => {
916                    let node = self.graph.add_node(seq[i]);
917                    self.graph.add_edge(prev, node, 1);
918                    prev = node;
919                    i += 1;
920                }
921                AlignmentOperation::Del(_) => {} // we should only have to skip over deleted nodes and xclip
922                AlignmentOperation::Xclip(_) => {}
923                AlignmentOperation::Yclip(_, r) => {
924                    i = *r;
925                }
926            }
927        }
928    }
929}
930
931#[cfg(test)]
932mod tests {
933    use super::*;
934    use crate::alignment::pairwise::Scoring;
935    use petgraph::dot::Dot;
936    use petgraph::graph::NodeIndex;
937
938    #[test]
939    fn test_init_graph() {
940        // sanity check for String -> Graph
941
942        let scoring = Scoring::new(-1, 0, |a: u8, b: u8| if a == b { 1i32 } else { -1i32 });
943        let poa = Poa::from_string(scoring, b"123456789");
944        assert!(poa.graph.is_directed());
945        assert_eq!(poa.graph.node_count(), 9);
946        assert_eq!(poa.graph.edge_count(), 8);
947    }
948
949    #[test]
950    fn test_alignment() {
951        let scoring = Scoring::new(-1, 0, |a: u8, b: u8| if a == b { 1i32 } else { -1i32 });
952        // examples from the POA paper
953        //let _seq1 = b"PKMIVRPQKNETV";
954        //let _seq2 = b"THKMLVRNETIM";
955        let poa = Poa::from_string(scoring, b"GATTACA");
956        let traceback = poa.custom(b"GCATGCU");
957        let alignment = poa.recalculate_alignment(&traceback);
958        assert_eq!(alignment.score, 0);
959
960        let traceback = poa.custom(b"GCATGCUx");
961        let alignment = poa.recalculate_alignment(&traceback);
962        assert_eq!(alignment.score, -1);
963
964        let traceback = poa.custom(b"xCATGCU");
965        let alignment = poa.recalculate_alignment(&traceback);
966        assert_eq!(alignment.score, -2);
967    }
968
969    #[test]
970    fn test_branched_alignment() {
971        let scoring = Scoring::new(-1, 0, |a: u8, b: u8| if a == b { 1i32 } else { -1i32 });
972        let seq1 = b"TTTTT";
973        let seq2 = b"TTATT";
974        let mut poa = Poa::from_string(scoring, seq1);
975        let head: NodeIndex<usize> = NodeIndex::new(1);
976        let tail: NodeIndex<usize> = NodeIndex::new(2);
977        let node1 = poa.graph.add_node(b'A');
978        let node2 = poa.graph.add_node(b'A');
979        poa.graph.add_edge(head, node1, 1);
980        poa.graph.add_edge(node1, node2, 1);
981        poa.graph.add_edge(node2, tail, 1);
982        let traceback = poa.custom(seq2);
983        let alignment = poa.recalculate_alignment(&traceback);
984        assert_eq!(alignment.score, 3);
985    }
986
987    #[test]
988    fn test_alt_branched_alignment() {
989        let scoring = Scoring::new(-1, 0, |a: u8, b: u8| if a == b { 1i32 } else { -1i32 });
990
991        let seq1 = b"TTCCTTAA";
992        let seq2 = b"TTTTGGAA";
993        let mut poa = Poa::from_string(scoring, seq1);
994        let head: NodeIndex<usize> = NodeIndex::new(1);
995        let tail: NodeIndex<usize> = NodeIndex::new(2);
996        let node1 = poa.graph.add_node(b'A');
997        let node2 = poa.graph.add_node(b'A');
998        poa.graph.add_edge(head, node1, 1);
999        poa.graph.add_edge(node1, node2, 1);
1000        poa.graph.add_edge(node2, tail, 1);
1001        let traceback = poa.custom(seq2);
1002        let alignment = poa.recalculate_alignment(&traceback);
1003        poa.add_alignment(&alignment, seq2);
1004        assert_eq!(poa.graph.edge_count(), 14);
1005        assert!(poa
1006            .graph
1007            .contains_edge(NodeIndex::new(5), NodeIndex::new(10)));
1008        assert!(poa
1009            .graph
1010            .contains_edge(NodeIndex::new(11), NodeIndex::new(6)));
1011    }
1012
1013    #[test]
1014    fn test_insertion_on_branch() {
1015        let scoring = Scoring::new(-1, 0, |a: u8, b: u8| if a == b { 1i32 } else { -1i32 });
1016
1017        let seq1 = b"TTCCGGTTTAA";
1018        let seq2 = b"TTGGTATGGGAA";
1019        let seq3 = b"TTGGTTTGCGAA";
1020        let mut poa = Poa::from_string(scoring, seq1);
1021        let head: NodeIndex<usize> = NodeIndex::new(1);
1022        let tail: NodeIndex<usize> = NodeIndex::new(2);
1023        let node1 = poa.graph.add_node(b'C');
1024        let node2 = poa.graph.add_node(b'C');
1025        let node3 = poa.graph.add_node(b'C');
1026        poa.graph.add_edge(head, node1, 1);
1027        poa.graph.add_edge(node1, node2, 1);
1028        poa.graph.add_edge(node2, node3, 1);
1029        poa.graph.add_edge(node3, tail, 1);
1030        let traceback = poa.custom(seq2);
1031        let alignment = poa.recalculate_alignment(&traceback);
1032        assert_eq!(alignment.score, 2);
1033        poa.add_alignment(&alignment, seq2);
1034        let traceback = poa.custom(seq3);
1035        let alignment2 = poa.recalculate_alignment(&traceback);
1036
1037        assert_eq!(alignment2.score, 10);
1038    }
1039
1040    #[test]
1041    fn test_poa_method_chaining() {
1042        let scoring = Scoring::new(-1, 0, |a: u8, b: u8| if a == b { 1i32 } else { -1i32 });
1043        let mut aligner = Aligner::new(scoring, b"TTCCGGTTTAA");
1044        aligner
1045            .global(b"TTGGTATGGGAA")
1046            .add_to_graph()
1047            .global(b"TTGGTTTGCGAA");
1048        assert_eq!(aligner.alignment().score, 10);
1049    }
1050
1051    #[test]
1052    fn test_global_banded() {
1053        // need strings long enough for the band to matter
1054        // create MSA with and without band and make sure they are the same
1055        let s1 = b"TGGCATGCTCAAGGACCGTTGAATACTATCTTAATGGACCGCAAGCTCCCTGAAGGTGGGCCACATTCGAGGGCC\
1056        CGGCCTCCACCTATTCCCAACGAAACTAGCATTAACATGGACAGGGGCGCATAAAACAGAGTTTCTCCTAATCCCCTTTCCCCTG\
1057        GAGTGCTAGTCAGAACCGCACATGTTGACGCTTTGGTCAGGTGTAGCCGATTCACTACCCGGGGTAGTACGAGTGGTAGCACCAT\
1058        GGTTAGCTTCTCCGGGATGTTCCGCGAAGAGAGCGGAGCGGGCGTGCACAAGCTCGGACAACCCTAGTGTGCATCAAATGCCATA\
1059        TGTTCTGCTTTGTCTGTGACTCACGCCCACGTTTGACATCACTCTTACTATCCAACGGGCCAAGCTTAGGAGGGGCGGACCTATT\
1060        GAACCATTAGAGGGGATCCTTCTGAAGTTAAGGCACAGCGTTGAGGGGCTATAGTCGATCCTCTTAGTAAATATAATGGACAGGT\
1061        CTTTACGACACAGTATGAATTAGTCCAATGGAGCCATTGTAATCGATGAAACTGTTATATCTGTTGGCCTAGTCGCAACGGTCTA\
1062        CATCGCTAGCGTAACGGTTAAGACCTCTTCCACGAGTGGGACACTCATAAAGCTCGCGGCCCTTACGATCTAGGGGAGCGCACTC\
1063        CGTAGTCAATCACGGCCAGCCGGTGTGCGCTAAGTTACGAAACAGTCACGAGCGATGAACCGTATGAAGAATGGACCCTTCTAAG\
1064        ATGTGAACACCTAGATGAGCAGCAAGACAATTGCTCTCGCCGACTCGTTCGAAAGTGTACCTCGAGAGCACAACACGCATTACCC\
1065        AGGTGACCGTGTATTGACTGCCCGTTACTCAGAAACCTTACAGTATTAATCGCCTAGTCTGTATAGTATTCATTCTGCCCGTGAC
1066        ATGCGGGAAGCCTGCTGAGATTGGCAGCGTCTTTGGAGGGTTACCAAGCGAGGACACGGGCAAATTGAGGTGT";
1067        let s2 = b"TGGCTACATGCTCAAGCATCGTTGAAGCTCATCTTAATGGACCGCAACGGCCGCCTGAAGGTGGGACACGTGACG\
1068        GGCGGGGGCCCGGCCTTAACCCATTCTCAAGCAACTAGCATACTGGACAGCGGCGCATATACAGAGAATCGCCTAAACCCACTTT\
1069        TGCCCTGAGTGCTAGTCAGTCCCCCACATCTGACACTTCGGTGGCGCACGTTTAGCAGCTTACACTACCCGGGGCAGTACGAGTG\
1070        CTAGCACGGTAGCCTCCGGAGGGCTGCGAGGAATAGAACGGAGAGGGCGTCCTCAAGCCGGACAACCCTAGTGTGCATCAAATGA\
1071        TGCCTGCTGATTTTCTGTGCATTTCACGCCCAATTCACAATCACTCCTACTATCCAACGGGCAAGCATAAGGGAGGGGGGGAGTA\
1072        CGTCTATTGCACCATTAGAGGGGTACTTCGAATTCGTTGAACTGAGATAGAGTCGATCCTCTTTGTATATAAACGCAGGTACTTT\
1073        GCTATAAGGTGAATTATTCAAATGGAGCCATTGTAATCGATGACAATGTTATACCTTTAGGCCTAGATCAACGGTCTCCATCGCA\
1074        AGCGTAACGATTATGACCCAACGAGTGGACACTCATAGAGCGGCCCTTACGAGCTAGGCGAGCGCAATCCGTGTGAATCACAGCC\
1075        AGACGGGATGTTGCGTTAAGCTACGAAACATCACGCGGTGAGCGTATGAATAATGGACCCGTCAAAATGTGGGCAGCGAGCAGCA\
1076        GGACAATTGCTCGGGTCCGGTAGCGACTCGTTCGAAACTGTAAACGTCGAGGCACAACACGCATTAGCCAGGTGAACATGTATTG\
1077        ACCGCCCCGTAGATACCTTACAGTATTAATCGCCTAGTCTGTATGATCTTCGTTCTGCCTGTGAACATGAGGGAAGCCTGCTTGA\
1078        GTTTGGCAGCGTCTTTGGGGTTTCCAAGCGAGCGACACGGGCAAATTGAGGTGT";
1079        let s3 = b"TGGCATGCTCAAGGAGTGCTGAAGCTCATTTTAATGGACCGCAACGGCCGCCTGAAGGTGGGGCACGTGACGGGC\
1080        GAGGGCCCGGCCTTAACCCATTCTCAAGAAACTCGTATACTGGACAGCGGCGCATATAGAGAGATTCTCCTAAACCCTCTTTTGC\
1081        CCTGACATGTGCTAGTCAGTCGCCCACATCTGAACACTTCGGCAGCGCACGTCTAGCAGCTTACACTACCGGGCGGGGCAGGTAC\
1082        GAGTGCTAGCACGGTAGCCTCTCCCGGAGTGCTGGGAATAGAAGGGAGAGGGCGTCCTCATGCCGGCGACCCTAGTGTGCATCAA\
1083        ATGAGATGCCTGCTGTGATTTTCACATTCACAATCACTCTTACCATCCCAACGGGACAAGCATAAGGGAGGGGGGGAGCTATTGA\
1084        ACCAAGAGGGGTCCTCCGGAATTCGTTGAGCTGCGATAGAGTCGATCCTCTTTGTATATAAACGCAGGTACTTTGCGATTAGGTG\
1085        AAGTATTCAAATGGAGCCATTGTAATCGATGACAATGTGATGCCTTTAGGCCTAGATCACGGTCTACATCGCGTAAGCGTAACGA\
1086        TTATGACCCAACGAGAGGCACACTCATAAAGCGCGGCCCTTACTAGCTAGGCGAGCTTAGCAATCCGTGCAATCACACCCAGACG\
1087        GGTTGAGCTAAGCTACGGAACACCACGCGATGAGCCGTATGAAGAATGGACCCGTCGAAAATGTGGACAGCGAGCATCAGGACAA\
1088        TTGCTCGGGTCCGCGACTCGTGCGGAACTGTAAACGTCGAGGCACAACACGATTAGCCAGGTGAACATGTAGACCGCCCCGTAGA\
1089        TATTTTACAGTATTAATCGCCTAGTCTGTATAGGATCTTCGTTCTGCCTGTGAACATGCGGGAAGCCTGCTTGAGATTGGCAGCG\
1090        TCTTTGGGCAAGCGAGGACACGGGCAAATCGAGGTGG";
1091        let scoring = Scoring::from_scores(-2, -2, 2, -4);
1092        let mut aligner_banded = Aligner::new(scoring, s1);
1093        aligner_banded.global_banded(s2, 25).add_to_graph();
1094        aligner_banded.global_banded(s3, 25);
1095        let scoring = Scoring::from_scores(-2, -2, 2, -4);
1096        let mut aligner_unbanded = Aligner::new(scoring, s1);
1097        aligner_unbanded.global(s2).add_to_graph();
1098        aligner_unbanded.global(s3);
1099        let alignment_banded = aligner_banded.alignment();
1100        let alignment_unbanded = aligner_unbanded.alignment();
1101        for (i, operation) in alignment_banded.operations.iter().enumerate() {
1102            assert_eq!(*operation, alignment_unbanded.operations[i]);
1103        }
1104    }
1105
1106    #[test]
1107    fn test_edge_cases() {
1108        // case 1
1109        let scoring = Scoring::new(-1, 0, |a: u8, b: u8| if a == b { 1i32 } else { -1i32 });
1110        let mut aligner = Aligner::new(scoring, b"BBA");
1111        aligner.global(b"AAA").add_to_graph();
1112        let g = aligner.graph().map(|_, n| (*n) as char, |_, e| *e);
1113        let dot = format!("{:?}", Dot::new(&g));
1114        let output = "digraph {\n    0 [ label = \"'B'\" ]\n    1 [ label = \"'B'\" ]\n    2 [ label = \"'A'\" ]\
1115        \n    3 [ label = \"'A'\" ]\n    4 [ label = \"'A'\" ]\n    0 -> 1 [ label = \"1\" ]\n    1 -> 2 [ label = \"1\" ]\
1116        \n    3 -> 4 [ label = \"1\" ]\n    4 -> 2 [ label = \"1\" ]\n}\n".to_string();
1117        assert_eq!(dot, output);
1118        // case 2
1119        let scoring = Scoring::new(-1, 0, |a: u8, b: u8| if a == b { 1i32 } else { -1i32 });
1120        let mut aligner = Aligner::new(scoring, b"AAA");
1121        aligner.global(b"ABA").add_to_graph();
1122        let g = aligner.graph().map(|_, n| (*n) as char, |_, e| *e);
1123        let dot = format!("{:?}", Dot::new(&g));
1124        let output = "digraph {\n    0 [ label = \"'A'\" ]\n    1 [ label = \"'A'\" ]\n    2 [ label = \"'A'\" ]\
1125        \n    3 [ label = \"'B'\" ]\n    0 -> 1 [ label = \"1\" ]\n    1 -> 2 [ label = \"1\" ]\n    0 -> 3 [ label = \"1\" ]\
1126        \n    3 -> 2 [ label = \"1\" ]\n}\n".to_string();
1127        assert_eq!(dot, output);
1128        // case 3
1129        let scoring = Scoring::new(-1, 0, |a: u8, b: u8| if a == b { 1i32 } else { -1i32 });
1130        let mut aligner = Aligner::new(scoring, b"BBBBBAAA");
1131        aligner.global(b"AAA").add_to_graph();
1132        let g = aligner.graph().map(|_, n| (*n) as char, |_, e| *e);
1133        let dot = format!("{:?}", Dot::new(&g));
1134        let output = "digraph {\n    0 [ label = \"'B'\" ]\n    1 [ label = \"'B'\" ]\n    2 [ label = \"'B'\" ]\
1135        \n    3 [ label = \"'B'\" ]\n    4 [ label = \"'B'\" ]\n    5 [ label = \"'A'\" ]\n    6 [ label = \"'A'\" ]\
1136        \n    7 [ label = \"'A'\" ]\n    0 -> 1 [ label = \"1\" ]\n    1 -> 2 [ label = \"1\" ]\n    2 -> 3 [ label = \"1\" ]\
1137        \n    3 -> 4 [ label = \"1\" ]\n    4 -> 5 [ label = \"1\" ]\n    5 -> 6 [ label = \"2\" ]\n    6 -> 7 [ label = \"2\" ]\n}\n".to_string();
1138        assert_eq!(dot, output);
1139        // case 4
1140        let scoring = Scoring::new(-1, 0, |a: u8, b: u8| if a == b { 1i32 } else { -1i32 });
1141        let mut aligner = Aligner::new(scoring, b"AAA");
1142        aligner.global(b"BBBBBAAA").add_to_graph();
1143        let g = aligner.graph().map(|_, n| (*n) as char, |_, e| *e);
1144        let dot = format!("{:?}", Dot::new(&g));
1145        let output = "digraph {\n    0 [ label = \"'A'\" ]\n    1 [ label = \"'A'\" ]\n    2 [ label = \"'A'\" ]\
1146        \n    3 [ label = \"'B'\" ]\n    4 [ label = \"'B'\" ]\n    5 [ label = \"'B'\" ]\n    6 [ label = \"'B'\" ]\
1147        \n    7 [ label = \"'B'\" ]\n    0 -> 1 [ label = \"2\" ]\n    1 -> 2 [ label = \"2\" ]\n    3 -> 4 [ label = \"1\" ]\
1148        \n    4 -> 5 [ label = \"1\" ]\n    5 -> 6 [ label = \"1\" ]\n    6 -> 7 [ label = \"1\" ]\n    7 -> 0 [ label = \"1\" ]\n}\n".to_string();
1149        assert_eq!(dot, output);
1150    }
1151
1152    #[test]
1153    fn test_consensus() {
1154        let scoring = Scoring::new(-1, 0, |a: u8, b: u8| if a == b { 1i32 } else { -1i32 });
1155        let mut aligner = Aligner::new(scoring, b"GCATGCUx");
1156        aligner.global(b"GCATGCU").add_to_graph();
1157        aligner.global(b"xCATGCU").add_to_graph();
1158        assert_eq!(aligner.consensus(), b"GCATGCUx");
1159    }
1160
1161    #[test]
1162    fn test_xclip_prefix_custom() {
1163        let x = b"GGGGGGATG";
1164        let y = b"ATG";
1165
1166        let score = |a: u8, b: u8| if a == b { 1i32 } else { -1i32 };
1167        let scoring = Scoring::new(-5, -1, &score).xclip(-5);
1168
1169        let mut aligner = Aligner::new(scoring, x);
1170        let alignment = aligner.custom(y).alignment();
1171
1172        assert_eq!(
1173            alignment.operations,
1174            [
1175                AlignmentOperation::Xclip(0),
1176                AlignmentOperation::Match(Some((5, 6))),
1177                AlignmentOperation::Match(Some((6, 7))),
1178                AlignmentOperation::Match(Some((7, 8)))
1179            ]
1180        );
1181    }
1182
1183    #[test]
1184    fn test_yclip_prefix_custom() {
1185        let y = b"GGGGGGATG";
1186        let x = b"ATG";
1187
1188        let score = |a: u8, b: u8| if a == b { 1i32 } else { -1i32 };
1189        let scoring = Scoring::new(-5, -1, &score).yclip(-5);
1190
1191        let mut aligner = Aligner::new(scoring, x);
1192        let alignment = aligner.custom(y).alignment();
1193
1194        assert_eq!(
1195            alignment.operations,
1196            [
1197                AlignmentOperation::Yclip(0, 6),
1198                AlignmentOperation::Match(None),
1199                AlignmentOperation::Match(Some((0, 1))),
1200                AlignmentOperation::Match(Some((1, 2)))
1201            ]
1202        );
1203    }
1204
1205    #[test]
1206    fn test_xclip_suffix_custom() {
1207        let x = b"GAAAA";
1208        let y = b"CG";
1209
1210        let score = |a: u8, b: u8| if a == b { 1i32 } else { -1i32 };
1211        let scoring = Scoring::new(-5, -1, &score).xclip(0).yclip(0);
1212
1213        let mut aligner = Aligner::new(scoring, x);
1214        let alignment = aligner.custom(y).alignment();
1215
1216        assert_eq!(
1217            alignment.operations,
1218            [
1219                AlignmentOperation::Yclip(0, 1),
1220                AlignmentOperation::Match(None),
1221                AlignmentOperation::Xclip(1)
1222            ]
1223        );
1224    }
1225
1226    #[test]
1227    fn test_yclip_suffix_custom() {
1228        let y = b"GAAAA";
1229        let x = b"CG";
1230
1231        let score = |a: u8, b: u8| if a == b { 3i32 } else { -3i32 };
1232        let scoring = Scoring::new(-5, -1, &score).yclip(-5).xclip(0);
1233
1234        let mut aligner = Aligner::new(scoring, x);
1235        let alignment = aligner.custom(y).alignment();
1236
1237        assert_eq!(
1238            alignment.operations,
1239            [
1240                AlignmentOperation::Xclip(0),
1241                AlignmentOperation::Match(Some((0, 1))),
1242                AlignmentOperation::Yclip(1, 5)
1243            ]
1244        );
1245    }
1246}