block_aligner/cigar.rs
1//! Data structures and functions for working with CIGAR strings.
2
3use std::fmt;
4
5/// A match/mismatch, insertion, or deletion operation.
6///
7/// When aligning `q` against `r`, this represents the edit operations to get from `r` to `q`.
8#[derive(Debug, PartialEq, Copy, Clone)]
9#[repr(u8)]
10pub enum Operation {
11 /// Placeholder variant.
12 Sentinel = 0u8,
13 /// Match or mismatch.
14 ///
15 /// This is a diagonal transition in the DP matrix with `|q| + 1` rows and `|r| + 1` columns.
16 M = 1u8,
17 /// Match.
18 Eq = 2u8,
19 /// Mismatch.
20 X = 3u8,
21 /// Insertion.
22 ///
23 /// When aligning sequences `q` against `r`, this is a gap in `r`.
24 /// This is a row transition in the DP matrix with `|q| + 1` rows and `|r| + 1` columns.
25 I = 4u8,
26 /// Deletion.
27 ///
28 /// When aligning sequences `q` against `r`, this is a gap in `q`.
29 /// This is a column transition in the DP matrix with `|q| + 1` rows and `|r| + 1` columns.
30 D = 5u8
31}
32
33/// An operation and how many times that operation is repeated.
34#[derive(Debug, Copy, Clone)]
35#[repr(C)]
36pub struct OpLen {
37 pub op: Operation,
38 pub len: usize
39}
40
41/// A CIGAR string that holds a list of operations.
42pub struct Cigar {
43 s: Vec<OpLen>,
44 idx: usize
45}
46
47impl Cigar {
48 /// Create a new CIGAR string with certain maximum lengths for the aligned sequences.
49 pub fn new(query_len: usize, reference_len: usize) -> Self {
50 let s = vec![OpLen { op: Operation::Sentinel, len: 0 }; query_len + reference_len + 5];
51 // first element should always be a sentinel
52 let idx = 1;
53 Cigar { s, idx }
54 }
55
56 /// Clear this CIGAR string.
57 #[allow(dead_code)]
58 pub(crate) fn clear(&mut self, query_len: usize, reference_len: usize) {
59 self.s[..query_len + reference_len + 5].fill(OpLen { op: Operation::Sentinel, len: 0 });
60 self.idx = 1;
61 }
62
63 /// Branchlessly add a new operation (in reverse order).
64 ///
65 /// Other methods should allow the CIGAR string to be viewed
66 /// in the correct (not reversed) order.
67 ///
68 /// The total number of added operations must not exceed the
69 /// maximum length the CIGAR string was created with.
70 #[allow(dead_code)]
71 pub(crate) unsafe fn add(&mut self, op: Operation) {
72 debug_assert!(self.idx < self.s.len());
73 // branchlessly append one operation
74 // make sure that contiguous operations are run-length encoded
75 let add = (op != (*self.s.as_ptr().add(self.idx - 1)).op) as usize;
76 self.idx += add;
77 (*self.s.as_mut_ptr().add(self.idx - 1)).op = op;
78 (*self.s.as_mut_ptr().add(self.idx - 1)).len += 1;
79 }
80
81 /// Reverse the CIGAR string in place.
82 pub fn reverse(&mut self) {
83 self.s[1..self.idx].reverse();
84 }
85
86 /// Length of the CIGAR string, not including the first sentinel.
87 pub fn len(&self) -> usize {
88 self.idx - 1
89 }
90
91 /// Get a certain operation in the CIGAR string.
92 pub fn get(&self, i: usize) -> OpLen {
93 self.s[self.idx - 1 - i]
94 }
95
96 /// Generate two strings to visualize the edit operations.
97 pub fn format(&self, q: &[u8], r: &[u8]) -> (String, String) {
98 let mut a = String::with_capacity(self.idx);
99 let mut b = String::with_capacity(self.idx);
100 let mut i = 0;
101 let mut j = 0;
102
103 for &op_len in self.s[1..self.idx].iter().rev() {
104 match op_len.op {
105 Operation::M | Operation::Eq | Operation::X => {
106 for _k in 0..op_len.len {
107 a.push(q[i] as char);
108 b.push(r[j] as char);
109 i += 1;
110 j += 1;
111 }
112 },
113 Operation::I => {
114 for _k in 0..op_len.len {
115 a.push(q[i] as char);
116 b.push('-');
117 i += 1;
118 }
119 },
120 Operation::D => {
121 for _k in 0..op_len.len {
122 a.push('-');
123 b.push(r[j] as char);
124 j += 1;
125 }
126 },
127 _ => continue
128 }
129 }
130
131 (a, b)
132 }
133
134 /// Create a copy of the operations in the CIGAR string and
135 /// ensure that the vector is provided in the correct order.
136 ///
137 /// Sentinels are removed.
138 pub fn to_vec(&self) -> Vec<OpLen> {
139 self.s[1..self.idx]
140 .iter()
141 .rev()
142 .map(|&op_len| op_len)
143 .collect::<Vec<OpLen>>()
144 }
145}
146
147impl fmt::Display for Cigar {
148 /// Print a CIGAR string in standard CIGAR format.
149 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
150 for &op_len in self.s[1..self.idx].iter().rev() {
151 let c = match op_len.op {
152 Operation::M => 'M',
153 Operation::Eq => '=',
154 Operation::X => 'X',
155 Operation::I => 'I',
156 Operation::D => 'D',
157 _ => continue
158 };
159 write!(f, "{}{}", op_len.len, c)?;
160 }
161 Ok(())
162 }
163}