bio_seq/codec/
dna.rs

1//! 2-bit DNA representation: `A: 00, C: 01, G: 10, T: 11`
2
3use crate::codec::Codec;
4//use crate::kmer::Kmer;
5//use crate::seq::{Seq, SeqArray, SeqSlice};
6use crate::{Complement, ComplementMut};
7
8#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
9#[repr(u8)]
10pub enum Dna {
11    A = 0b00,
12    C = 0b01,
13    G = 0b10,
14    T = 0b11,
15}
16
17impl Codec for Dna {
18    const BITS: u8 = 2;
19
20    /// Transmute a `u8` into a nucleotide
21    ///
22    /// SAFETY: This only looks at the lower 2 bits of the `u8`
23    fn unsafe_from_bits(b: u8) -> Self {
24        debug_assert!(b < 4);
25        unsafe { std::mem::transmute(b & 0b11) }
26    }
27
28    /// We can verify that a byte is a valid `Dna` value if it's
29    /// between 0 and 3.
30    fn try_from_bits(b: u8) -> Option<Self> {
31        if b < 4 {
32            Some(unsafe { std::mem::transmute::<u8, Dna>(b) })
33        } else {
34            None
35        }
36    }
37
38    /// The ASCII values of 'A', 'C', 'G', and 'T' can be translated into
39    /// the numbers 0, 1, 2, and 3 using bitwise operations: `((b << 1) + b) >> 3`.
40    /// In other words, multiply the ASCII value by 3 and shift right.
41    fn unsafe_from_ascii(b: u8) -> Self {
42        // TODO: benchmark against b * 3
43        Dna::unsafe_from_bits(((b << 1) + b) >> 3)
44    }
45
46    fn try_from_ascii(c: u8) -> Option<Self> {
47        match c {
48            b'A' => Some(Dna::A),
49            b'C' => Some(Dna::C),
50            b'G' => Some(Dna::G),
51            b'T' => Some(Dna::T),
52            _ => None,
53        }
54    }
55
56    fn to_char(self) -> char {
57        match self {
58            Dna::A => 'A',
59            Dna::C => 'C',
60            Dna::G => 'G',
61            Dna::T => 'T',
62        }
63    }
64
65    fn to_bits(self) -> u8 {
66        self as u8
67    }
68
69    fn items() -> impl Iterator<Item = Self> {
70        vec![Dna::A, Dna::C, Dna::G, Dna::T].into_iter()
71    }
72}
73
74/// This 2-bit representation of nucleotides lends itself to a very fast
75/// complement implementation with bitwise xor
76impl ComplementMut for Dna {
77    fn comp(&mut self) {
78        *self = Dna::unsafe_from_bits(*self as u8 ^ 0b11);
79    }
80}
81
82impl Complement for Dna {}
83
84/*
85impl ComplementMut for Seq<Dna> {
86    fn comp(&mut self) {
87        for word in self.bv.as_raw_mut_slice() {
88            *word ^= usize::MAX;
89        }
90    }
91}
92*/
93
94/*
95impl ReverseMut for Seq<Dna> {
96    fn rev(&mut self) {
97        self.bv.reverse();
98        for word in self.bv.as_raw_mut_slice().iter_mut() {
99            let c: usize = *word;
100            let odds = (c & 0x5555_5555_5555_5555usize) >> 1;
101            *word = (c & 0xAAAA_AAAA_AAAA_AAAAusize) << 1 | odds;
102        }
103    }
104}
105*/
106
107#[cfg(test)]
108mod tests {
109    use crate::prelude::*;
110
111    #[test]
112    fn dna_kmer_equality() {
113        assert_eq!(
114            Kmer::<Dna, 8>::try_from(dna!("TGCACATG")).unwrap(),
115            Kmer::<Dna, 8>::try_from(dna!("TGCACATG")).unwrap()
116        );
117        assert_ne!(
118            Kmer::<Dna, 7>::try_from(dna!("GTGACGA")).unwrap(),
119            Kmer::<Dna, 7>::try_from(dna!("GTGAAGA")).unwrap()
120        );
121    }
122
123    #[test]
124    fn dna_kmer_macro() {
125        assert_eq!(
126            kmer!("TGCACATG"),
127            Kmer::<Dna, 8>::try_from(dna!("TGCACATG")).unwrap()
128        );
129        assert_ne!(
130            kmer!("GTGACGA"),
131            Kmer::<Dna, 7>::try_from(dna!("GTGAAGA")).unwrap()
132        );
133    }
134
135    /*
136    #[test]
137    fn dna_kmer_complement() {
138        assert_eq!(
139            format!(
140                "{:b}",
141                Kmer::<Dna, 8>::try_from(dna!("AAAAAAAA"))
142                    .unwrap()
143                    .comp()
144                    .bs
145            ),
146            format!(
147                "{:b}",
148                Kmer::<Dna, 8>::try_from(dna!("TTTTTTTT")).unwrap().bs
149            )
150        );
151
152        assert_eq!(
153            Kmer::<Dna, 1>::try_from(dna!("C")).unwrap().comp(),
154            Kmer::<Dna, 1>::try_from(dna!("G")).unwrap()
155        );
156
157        assert_eq!(
158            Kmer::<Dna, 16>::from(dna!("AAAATGCACATGTTTT")).comp(),
159            Kmer::<Dna, 16>::from(dna!("TTTTACGTGTACAAAA"))
160        );
161    }
162    */
163}