bio_seq/codec/degenerate/
ws.rs

1use crate::ComplementMut;
2use crate::codec::Codec;
3
4/// 1-bit encoding for nucleotides with **W**eak (`A`/`T`) and **S**trong (`G`/`C`) bond strengths.
5#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
6#[repr(u8)]
7pub enum WS {
8    W = 0b0,
9    S = 0b1,
10}
11
12impl Codec for WS {
13    const BITS: u8 = 1;
14
15    /// Transmute a `u8` into a degenerate 1-bit nucleotide
16    ///
17    /// SAFETY: This only looks at the lower 2 bits of the `u8`
18    fn unsafe_from_bits(b: u8) -> Self {
19        debug_assert!(b < 2);
20        unsafe { std::mem::transmute(b & 0b1) }
21    }
22
23    /// Valid values are `0` and `1`
24    fn try_from_bits(b: u8) -> Option<Self> {
25        if b < 2 {
26            Some(unsafe { std::mem::transmute::<u8, WS>(b) })
27        } else {
28            None
29        }
30    }
31
32    /// TODO: fast translation of A, T, W to 0 and C, G, S to 1
33    fn unsafe_from_ascii(_b: u8) -> Self {
34        todo!()
35    }
36
37    fn try_from_ascii(c: u8) -> Option<Self> {
38        match c {
39            b'S' | b'C' | b'G' => Some(WS::S),
40            b'W' | b'A' | b'T' => Some(WS::W),
41            _ => None,
42        }
43    }
44
45    fn to_char(self) -> char {
46        match self {
47            WS::S => 'S',
48            WS::W => 'W',
49        }
50    }
51
52    fn to_bits(self) -> u8 {
53        self as u8
54    }
55
56    fn items() -> impl Iterator<Item = Self> {
57        vec![WS::S, WS::W].into_iter()
58    }
59}
60
61impl ComplementMut for WS {
62    /// This representation erases complements, so this is the identify function
63    fn comp(&mut self) {}
64}
65
66#[cfg(test)]
67mod tests {
68    use crate::codec::degenerate;
69    use crate::prelude::*;
70
71    #[test]
72    fn test_1bit() {
73        let seq = Seq::<degenerate::WS>::from_str("SSSWWWSW").unwrap();
74        let seq_rc: Seq<degenerate::WS> = seq.to_revcomp();
75        assert_eq!("WSWWWSSS", String::from(seq_rc));
76    }
77}