bio_seq/codec/masked/
dna.rs

1use crate::codec::Codec;
2//use crate::{Complement, Maskable, Reverse, ReverseComplement};
3use crate::{Complement, ComplementMut, MaskableMut}; //, ReverseComplementMut, ReverseMut};
4
5/// **Experimental** 4-bit nucleotide encoding with fast reverse complement and toggled mask operation
6///
7/// Note that masking/unmasking are not idempotent
8#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash, Codec)]
9#[bits(4)]
10#[repr(u8)]
11pub enum Dna {
12    A = 0b1000,
13    C = 0b0100,
14    G = 0b0010,
15    T = 0b0001,
16
17    #[display('a')]
18    AMasked = 0b0111,
19    #[display('c')]
20    CMasked = 0b1011,
21    #[display('g')]
22    GMasked = 0b1101,
23    #[display('t')]
24    TMasked = 0b1110,
25
26    N = 0b0000,
27    #[display('n')]
28    NMasked = 0b1111,
29
30    #[display('-')]
31    #[alt(0b0011)]
32    Gap = 0b1100,
33
34    #[display('.')]
35    #[alt(0b0101)]
36    Pad = 0b1010,
37
38    #[display('?')]
39    Unknown1 = 0b0110,
40
41    #[display('!')]
42    Unknown2 = 0b1001,
43}
44
45impl ComplementMut for Dna {
46    /// This representation can be complemented by reversing the bit pattern
47    fn comp(&mut self) {
48        let bits = self.to_bits();
49        *self = Self::unsafe_from_bits(
50            ((bits & 0b1000) >> 3)
51                | ((bits & 0b0100) >> 1)
52                | ((bits & 0b0010) << 1)
53                | ((bits & 0b0001) << 3),
54        );
55    }
56}
57
58impl Complement for Dna {}
59
60impl MaskableMut for Dna {
61    /// Inverting the bit pattern masks/unmasks this representation
62    fn mask(&mut self) {
63        let b = *self as u8 ^ 0b1111;
64        *self = Dna::unsafe_from_bits(b);
65    }
66
67    fn unmask(&mut self) {
68        let b = *self as u8 ^ 0b1111;
69        *self = Dna::unsafe_from_bits(b);
70    }
71}
72
73#[cfg(test)]
74mod tests {
75    use crate::codec::masked;
76    use crate::prelude::*;
77
78    #[test]
79    fn iupac_comp() {
80        let a: masked::Dna = masked::Dna::A;
81        let t: masked::Dna = masked::Dna::T;
82        let c: masked::Dna = masked::Dna::C;
83        let g: masked::Dna = masked::Dna::G;
84        assert_eq!(t.to_comp(), a);
85        assert_eq!(c.to_comp(), g);
86
87        assert_ne!(t.to_comp(), c);
88        assert_ne!(c.to_comp(), c);
89
90        assert_eq!(c.to_comp(), g.to_comp().to_comp());
91    }
92
93    #[test]
94    fn mask_sequence() {
95        let seq = Seq::<masked::Dna>::try_from("A.TCGCgtcataN--A").unwrap();
96
97        assert_ne!(seq.to_mask().to_string(), "a.tcgcGTGATAN--a".to_string());
98        assert_eq!(seq.to_mask().to_string(), "a.tcgcGTCATAn--a".to_string());
99    }
100
101    #[test]
102    fn masked_comp() {
103        let seq = Seq::<masked::Dna>::try_from("A.TCGCgtcataN--A").unwrap();
104
105        assert_eq!(seq.to_comp().to_string(), "T.AGCGcagtatN--T".to_string());
106    }
107
108    #[test]
109    fn masked_revcomp() {
110        let seq = Seq::<masked::Dna>::try_from("A.TCGCgtcataN--A").unwrap();
111
112        assert_ne!(seq.to_revcomp().to_string(), "T--NtaagacGCGA.T".to_string());
113        assert_eq!(seq.to_revcomp().to_string(), "T--NtatgacGCGA.T".to_string());
114    }
115}