Skip to main content

fqtk_lib/
mod.rs

1pub mod barcode_matching;
2pub mod bitenc;
3pub mod samples;
4
5use crate::bitenc::BitEnc;
6
7pub const DNA_BASES: [u8; 5] = *b"ACGTN";
8pub const IUPAC_BASES: [u8; 15] = *b"ACGTMRWSYKVHDBN";
9
10pub const BASE_A: usize = 1;
11pub const BASE_C: usize = 2;
12pub const BASE_G: usize = 4;
13pub const BASE_T: usize = 8;
14pub const BASE_B: usize = 15;
15pub const DNA_MASKS: [u8; 256] = {
16    let mut masks = [0; 256];
17    let (a, c, g, t) = (1, 2, 4, 8);
18    masks['A' as usize] = a;
19    masks['C' as usize] = c;
20    masks['G' as usize] = g;
21    masks['T' as usize] = t;
22    masks['U' as usize] = t;
23    masks['N' as usize] = a | c | g | t;
24    masks
25};
26pub const IUPAC_MASKS: [u8; 256] = {
27    let mut masks = [0; 256];
28    let (a, c, g, t) = (1, 2, 4, 8);
29    masks['A' as usize] = a;
30    masks['C' as usize] = c;
31    masks['G' as usize] = g;
32    masks['T' as usize] = t;
33    masks['U' as usize] = t;
34    masks['M' as usize] = a | c;
35    masks['R' as usize] = a | g;
36    masks['W' as usize] = a | t;
37    masks['S' as usize] = c | g;
38    masks['Y' as usize] = c | t;
39    masks['K' as usize] = g | t;
40    masks['V' as usize] = a | c | g;
41    masks['H' as usize] = a | c | t;
42    masks['D' as usize] = a | g | t;
43    masks['B' as usize] = c | g | t;
44    masks['N' as usize] = a | c | g | t;
45    masks
46};
47
48#[must_use]
49pub fn encode(bases: &[u8]) -> BitEnc {
50    let mut vec = BitEnc::with_capacity(4, bases.len());
51    for base in bases {
52        let bit: u8 = if byte_is_nocall(*base) {
53            IUPAC_MASKS[b'N' as usize]
54        } else {
55            let value = base.to_ascii_uppercase() as usize;
56            if value < 256 { IUPAC_MASKS[value] } else { 0 }
57        };
58        vec.push(bit);
59    }
60    vec
61}
62
63/// Decodes a DNA/IUPAC encoded squence.
64///
65/// # Panics
66/// when an invalid encoding is provided.
67#[must_use]
68pub fn decode(bases: &BitEnc) -> String {
69    let mut result = String::new();
70    for bit in bases.iter() {
71        let mut found = false;
72        for base in &IUPAC_BASES {
73            if IUPAC_MASKS[*base as usize] == bit {
74                result.push(*base as char);
75                found = true;
76                break;
77            }
78        }
79        assert!(found, "Invalid bit mask for base: {bit}");
80    }
81    result
82}
83
84/// Checks whether a given u8 byte is a "No-call"-ed base, signified by the bytes 'N', 'n' and '.'
85fn byte_is_nocall(byte: u8) -> bool {
86    byte == b'N' || byte == b'n' || byte == b'.'
87}
88
89/// Checks whether a provided byte is an IUPAC or nocall.
90fn is_valid_iupac(byte: u8) -> bool {
91    IUPAC_MASKS[byte as usize] != 0 || byte_is_nocall(byte)
92}
93
94#[cfg(test)]
95mod tests {
96    use super::*;
97
98    // ############################################################################################
99    // Test byte_is_no_call
100    // ############################################################################################
101    #[test]
102    fn test_byte_is_no_call() {
103        assert!(byte_is_nocall(b'N'));
104        assert!(byte_is_nocall(b'n'));
105        assert!(byte_is_nocall(b'.'));
106        assert!(!byte_is_nocall(b'A'));
107        assert!(!byte_is_nocall(b'C'));
108        assert!(!byte_is_nocall(b'G'));
109        assert!(!byte_is_nocall(b'T'));
110        assert!(!byte_is_nocall(b'a'));
111        assert!(!byte_is_nocall(b'c'));
112        assert!(!byte_is_nocall(b'g'));
113        assert!(!byte_is_nocall(b't'));
114    }
115
116    #[test]
117    fn test_is_valid_iupac() {
118        assert!(is_valid_iupac(b'N'));
119        assert!(is_valid_iupac(b'n'));
120        assert!(is_valid_iupac(b'.'));
121        assert!(!is_valid_iupac(b'a'));
122        assert!(!is_valid_iupac(b'c'));
123        assert!(!is_valid_iupac(b'g'));
124        assert!(!is_valid_iupac(b't'));
125        for base in IUPAC_BASES {
126            assert!(is_valid_iupac(base));
127        }
128    }
129
130    #[test]
131    fn test_encode_dna_bases() {
132        for base in DNA_BASES {
133            let actual: u8 = encode(&[base]).get(0).unwrap();
134            assert_eq!(actual, IUPAC_MASKS[base as usize]);
135        }
136    }
137
138    #[test]
139    fn test_decode_dna_bases() {
140        assert_eq!(DNA_BASES, decode(&encode(&DNA_BASES)).as_bytes());
141    }
142
143    #[test]
144    fn test_encode_iupac_bases() {
145        for base in IUPAC_BASES {
146            let actual: u8 = encode(&[base]).get(0).unwrap();
147            assert_eq!(actual, IUPAC_MASKS[base as usize]);
148        }
149    }
150
151    #[test]
152    fn test_decode_iupac_bases() {
153        assert_eq!(IUPAC_BASES, decode(&encode(&IUPAC_BASES)).as_bytes());
154    }
155
156    #[test]
157    fn test_encode_no_calls() {
158        for base in [b'N', b'n', b'.'] {
159            let actual: u8 = encode(&[base]).get(0).unwrap();
160            assert_eq!(actual as usize, 15);
161        }
162    }
163
164    #[test]
165    fn test_decode_no_calls() {
166        let bases = [b'N', b'n', b'.'];
167        assert_eq!(decode(&encode(&bases)), "NNN".to_string());
168    }
169}