1const CNV_NUM: [u8; 128] = [
6 b'A', b'C', b'G', b'T', b'N', b'R', b'Y', b'S', b'W', b'K', b'M', b'B', b'D', b'H', b'V', b'U',
8 b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ',
10 b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ',
11 b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ',
12 b' ', 0, 11, 1, 12, 30, 30, 2, 13, 30, 30, 9, 30, 10, 4, 30,
14 30, 30, 5, 7, 3, 15, 14, 8, 30, 6, 30, 30, 30, 30, 30, 30,
16 b' ', 0, 11, 1, 12, 30, 30, 2, 13, 30, 30, 9, 30, 10, 4, 30,
18 30, 30, 5, 7, 3, 15, 14, 8, 30, 6, 30, 30, 30, 30, 30, 30,
20];
21
22pub fn preprocess_raw_contig(contig: &mut Vec<u8>) {
37 let len = contig.len();
38 let mut in_pos = 0usize;
39 let mut out_pos = 0usize;
40
41 match len % 4 {
43 3 => {
44 let c = contig[in_pos];
45 in_pos += 1;
46 if c >> 6 != 0 {
47 contig[out_pos] = CNV_NUM[c as usize];
49 out_pos += 1;
50 }
51 let c = contig[in_pos];
53 in_pos += 1;
54 if c >> 6 != 0 {
55 contig[out_pos] = CNV_NUM[c as usize];
56 out_pos += 1;
57 }
58 let c = contig[in_pos];
60 in_pos += 1;
61 if c >> 6 != 0 {
62 contig[out_pos] = CNV_NUM[c as usize];
63 out_pos += 1;
64 }
65 }
66 2 => {
67 let c = contig[in_pos];
68 in_pos += 1;
69 if c >> 6 != 0 {
70 contig[out_pos] = CNV_NUM[c as usize];
71 out_pos += 1;
72 }
73 let c = contig[in_pos];
75 in_pos += 1;
76 if c >> 6 != 0 {
77 contig[out_pos] = CNV_NUM[c as usize];
78 out_pos += 1;
79 }
80 }
81 1 => {
82 let c = contig[in_pos];
83 in_pos += 1;
84 if c >> 6 != 0 {
85 contig[out_pos] = CNV_NUM[c as usize];
86 out_pos += 1;
87 }
88 }
89 _ => {} }
91
92 while in_pos < len {
94 let c = contig[in_pos];
95 in_pos += 1;
96 if c >> 6 != 0 {
97 contig[out_pos] = CNV_NUM[c as usize];
98 out_pos += 1;
99 }
100
101 let c = contig[in_pos];
102 in_pos += 1;
103 if c >> 6 != 0 {
104 contig[out_pos] = CNV_NUM[c as usize];
105 out_pos += 1;
106 }
107
108 let c = contig[in_pos];
109 in_pos += 1;
110 if c >> 6 != 0 {
111 contig[out_pos] = CNV_NUM[c as usize];
112 out_pos += 1;
113 }
114
115 let c = contig[in_pos];
116 in_pos += 1;
117 if c >> 6 != 0 {
118 contig[out_pos] = CNV_NUM[c as usize];
119 out_pos += 1;
120 }
121 }
122
123 contig.truncate(out_pos);
124}
125
126#[cfg(test)]
127mod tests {
128 use super::*;
129
130 #[test]
131 fn test_preprocess_raw_contig_acgt() {
132 let mut contig = b"ACGT".to_vec();
133 preprocess_raw_contig(&mut contig);
134 assert_eq!(contig, vec![0, 1, 2, 3]);
135 }
136
137 #[test]
138 fn test_preprocess_raw_contig_with_spaces() {
139 let mut contig = b"A C G T".to_vec();
140 preprocess_raw_contig(&mut contig);
141 assert_eq!(contig, vec![0, 1, 2, 3]);
143 }
144
145 #[test]
146 fn test_preprocess_raw_contig_mixed() {
147 let mut contig = b"ACGTN\nATGC".to_vec();
148 preprocess_raw_contig(&mut contig);
149 assert_eq!(contig, vec![0, 1, 2, 3, 4, 0, 3, 2, 1]);
151 }
152}