1pub mod barcode_matching;
2pub mod bitenc;
3pub mod samples;
4
5use crate::bitenc::BitEnc;
6
7pub const DNA_BASES: [u8; 5] = *b"ACGTN";
8pub const IUPAC_BASES: [u8; 15] = *b"ACGTMRWSYKVHDBN";
9
10pub const BASE_A: usize = 1;
11pub const BASE_C: usize = 2;
12pub const BASE_G: usize = 4;
13pub const BASE_T: usize = 8;
14pub const BASE_B: usize = 15;
15pub const DNA_MASKS: [u8; 256] = {
16 let mut masks = [0; 256];
17 let (a, c, g, t) = (1, 2, 4, 8);
18 masks['A' as usize] = a;
19 masks['C' as usize] = c;
20 masks['G' as usize] = g;
21 masks['T' as usize] = t;
22 masks['U' as usize] = t;
23 masks['N' as usize] = a | c | g | t;
24 masks
25};
26pub const IUPAC_MASKS: [u8; 256] = {
27 let mut masks = [0; 256];
28 let (a, c, g, t) = (1, 2, 4, 8);
29 masks['A' as usize] = a;
30 masks['C' as usize] = c;
31 masks['G' as usize] = g;
32 masks['T' as usize] = t;
33 masks['U' as usize] = t;
34 masks['M' as usize] = a | c;
35 masks['R' as usize] = a | g;
36 masks['W' as usize] = a | t;
37 masks['S' as usize] = c | g;
38 masks['Y' as usize] = c | t;
39 masks['K' as usize] = g | t;
40 masks['V' as usize] = a | c | g;
41 masks['H' as usize] = a | c | t;
42 masks['D' as usize] = a | g | t;
43 masks['B' as usize] = c | g | t;
44 masks['N' as usize] = a | c | g | t;
45 masks
46};
47
48#[must_use]
49pub fn encode(bases: &[u8]) -> BitEnc {
50 let mut vec = BitEnc::with_capacity(4, bases.len());
51 for base in bases {
52 let bit: u8 = if byte_is_nocall(*base) {
53 IUPAC_MASKS[b'N' as usize]
54 } else {
55 let value = base.to_ascii_uppercase() as usize;
56 if value < 256 { IUPAC_MASKS[value] } else { 0 }
57 };
58 vec.push(bit);
59 }
60 vec
61}
62
63#[must_use]
68pub fn decode(bases: &BitEnc) -> String {
69 let mut result = String::new();
70 for bit in bases.iter() {
71 let mut found = false;
72 for base in &IUPAC_BASES {
73 if IUPAC_MASKS[*base as usize] == bit {
74 result.push(*base as char);
75 found = true;
76 break;
77 }
78 }
79 assert!(found, "Invalid bit mask for base: {bit}");
80 }
81 result
82}
83
84fn byte_is_nocall(byte: u8) -> bool {
86 byte == b'N' || byte == b'n' || byte == b'.'
87}
88
89fn is_valid_iupac(byte: u8) -> bool {
91 IUPAC_MASKS[byte as usize] != 0 || byte_is_nocall(byte)
92}
93
94#[cfg(test)]
95mod tests {
96 use super::*;
97
98 #[test]
102 fn test_byte_is_no_call() {
103 assert!(byte_is_nocall(b'N'));
104 assert!(byte_is_nocall(b'n'));
105 assert!(byte_is_nocall(b'.'));
106 assert!(!byte_is_nocall(b'A'));
107 assert!(!byte_is_nocall(b'C'));
108 assert!(!byte_is_nocall(b'G'));
109 assert!(!byte_is_nocall(b'T'));
110 assert!(!byte_is_nocall(b'a'));
111 assert!(!byte_is_nocall(b'c'));
112 assert!(!byte_is_nocall(b'g'));
113 assert!(!byte_is_nocall(b't'));
114 }
115
116 #[test]
117 fn test_is_valid_iupac() {
118 assert!(is_valid_iupac(b'N'));
119 assert!(is_valid_iupac(b'n'));
120 assert!(is_valid_iupac(b'.'));
121 assert!(!is_valid_iupac(b'a'));
122 assert!(!is_valid_iupac(b'c'));
123 assert!(!is_valid_iupac(b'g'));
124 assert!(!is_valid_iupac(b't'));
125 for base in IUPAC_BASES {
126 assert!(is_valid_iupac(base));
127 }
128 }
129
130 #[test]
131 fn test_encode_dna_bases() {
132 for base in DNA_BASES {
133 let actual: u8 = encode(&[base]).get(0).unwrap();
134 assert_eq!(actual, IUPAC_MASKS[base as usize]);
135 }
136 }
137
138 #[test]
139 fn test_decode_dna_bases() {
140 assert_eq!(DNA_BASES, decode(&encode(&DNA_BASES)).as_bytes());
141 }
142
143 #[test]
144 fn test_encode_iupac_bases() {
145 for base in IUPAC_BASES {
146 let actual: u8 = encode(&[base]).get(0).unwrap();
147 assert_eq!(actual, IUPAC_MASKS[base as usize]);
148 }
149 }
150
151 #[test]
152 fn test_decode_iupac_bases() {
153 assert_eq!(IUPAC_BASES, decode(&encode(&IUPAC_BASES)).as_bytes());
154 }
155
156 #[test]
157 fn test_encode_no_calls() {
158 for base in [b'N', b'n', b'.'] {
159 let actual: u8 = encode(&[base]).get(0).unwrap();
160 assert_eq!(actual as usize, 15);
161 }
162 }
163
164 #[test]
165 fn test_decode_no_calls() {
166 let bases = [b'N', b'n', b'.'];
167 assert_eq!(decode(&encode(&bases)), "NNN".to_string());
168 }
169}