bio_seq/codec/masked/
iupac.rs

1use crate::codec::Codec;
2//use crate::{Complement, Maskable, Reverse, ReverseComplement};
3use crate::{ComplementMut, Maskable, MaskableMut}; //, ReverseComplementMut, ReverseMut};
4
5/// 5-bit encoding for maskable IUPAC symbols
6/// The middle bit is the mask flag and symbols are complemented by reversing the bit pattern
7#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash, Codec)]
8#[bits(5)]
9#[repr(u8)]
10pub enum Iupac {
11    A = 0b10000,
12    C = 0b01000,
13    G = 0b00010,
14    T = 0b00001,
15
16    Y = 0b01001,
17    R = 0b10010,
18    W = 0b10001,
19    S = 0b01010,
20
21    K = 0b00011,
22    M = 0b11000,
23    D = 0b10011,
24    V = 0b11010,
25
26    H = 0b11001,
27    B = 0b01011,
28
29    N = 0b11011,
30    #[display('-')]
31    X = 0b00000,
32
33    #[display('a')]
34    AMasked = 0b10100,
35    #[display('c')]
36    CMasked = 0b01100,
37    #[display('g')]
38    GMasked = 0b00110,
39    #[display('t')]
40    TMasked = 0b00101,
41
42    #[display('y')]
43    YMasked = 0b01101,
44    #[display('r')]
45    RMasked = 0b10110,
46    #[display('w')]
47    WMasked = 0b10101,
48    #[display('s')]
49    SMasked = 0b01110,
50
51    #[display('k')]
52    KMasked = 0b00111,
53    #[display('m')]
54    MMasked = 0b11100,
55    #[display('d')]
56    DMasked = 0b10111,
57    #[display('v')]
58    VMasked = 0b11110,
59
60    #[display('h')]
61    HMasked = 0b11101,
62    #[display('b')]
63    BMasked = 0b01111,
64
65    #[display('n')]
66    NMasked = 0b11111,
67    #[display('.')]
68    XMasked = 0b00100,
69}
70
71#[allow(clippy::cast_possible_truncation)]
72impl ComplementMut for Iupac {
73    /// This representation can be complemented by reversing the bit pattern
74    fn comp(&mut self) {
75        let bits = self.to_bits();
76        *self = Self::unsafe_from_bits(
77            ((bits & 0b10000) >> 4)
78                | ((bits & 0b01000) >> 2)
79                | (bits & 0b00100)
80                | ((bits & 0b00010) << 2)
81                | ((bits & 0b00001) << 4),
82        );
83    }
84}
85
86impl MaskableMut for Iupac {
87    /// Setting the middle bit sets the mask flag
88    fn mask(&mut self) {
89        let b = *self as u8 | 0b00100;
90        *self = Self::unsafe_from_bits(b);
91    }
92
93    /// Unsetting the middle bit clears the mask flag
94    fn unmask(&mut self) {
95        let b = *self as u8 & 0b11011;
96        *self = Self::unsafe_from_bits(b);
97    }
98}
99
100impl Maskable for Iupac {}
101
102#[cfg(test)]
103mod tests {
104    use crate::codec::masked;
105    use crate::prelude::*;
106
107    #[test]
108    fn mask_iupac_seq() {
109        let mut seq = Seq::<masked::Iupac>::try_from("A.TCGCgtcataN--A").unwrap();
110
111        assert_eq!(seq.to_mask().to_string(), "a.tcgcgtcatan..a".to_string());
112
113        seq.mask();
114        assert_eq!(seq.to_unmask().to_string(), "A-TCGCGTCATAN--A".to_string());
115        seq.unmask();
116        assert_eq!(seq.to_mask().to_string(), "a.tcgcgtcatan..a".to_string());
117    }
118
119    #[test]
120    fn comp_iupac_seq() {
121        let mut seq = Seq::<masked::Iupac>::try_from("A.TCGCgtcataN--A").unwrap();
122
123        assert_ne!(seq.to_comp().to_string(), "A.TCGCgtcataN--A".to_string());
124        assert_eq!(seq.to_comp().to_string(), "T.AGCGcagtatN--T".to_string());
125
126        seq.comp();
127        assert_eq!(seq.to_comp().to_string(), "A.TCGCgtcataN--A".to_string());
128    }
129}