bio_seq/codec/
text.rs

1//! 8-bit ASCII representation of nucleotides
2//!
3//! This encoding is a literal interpretation of bytes of text as DNA
4use crate::codec::{Codec, dna};
5use crate::error::ParseBioError;
6//use crate::{Complement, Reverse, ReverseComplement};
7
8#[derive(Copy, Clone, PartialEq, Debug, Ord, Eq, PartialOrd, Hash)]
9#[repr(transparent)]
10pub struct Dna(u8);
11
12impl Codec for Dna {
13    const BITS: u8 = 8;
14
15    fn unsafe_from_bits(b: u8) -> Self {
16        Self(b)
17    }
18
19    fn try_from_bits(b: u8) -> Option<Self> {
20        Some(Self(b))
21    }
22
23    fn unsafe_from_ascii(c: u8) -> Self {
24        Self(c)
25    }
26
27    fn try_from_ascii(c: u8) -> Option<Self> {
28        //        if c.is_ascii_alphanumeric() {
29        match c {
30            b'A' | b'C' | b'G' | b'T' | b'N' => Some(Self(c)),
31            _ => None,
32        }
33    }
34
35    fn to_char(self) -> char {
36        self.0.into()
37    }
38
39    fn to_bits(self) -> u8 {
40        self.0
41    }
42
43    fn items() -> impl Iterator<Item = Self> {
44        vec![Dna(b'A'), Dna(b'C'), Dna(b'G'), Dna(b'T'), Dna(b'N')].into_iter()
45    }
46}
47
48//impl Eq for Dna {
49//    fn eq(self, rhs: Self) {
50//        todo!()
51//    }
52//}
53
54/*
55impl Complement for Dna {
56    type Output = Self;
57
58    fn comp(&mut self) {
59        todo!()
60    }
61
62    fn to_comp(&self) -> Self {
63        match self {
64            Self(b'A') => Self(b'T'),
65            Self(b'C') => Self(b'G'),
66            Self(b'G') => Self(b'C'),
67            Self(b'T') => Self(b'A'),
68            _ => Self(b'N'),
69        }
70    }
71}
72    */
73
74impl From<Dna> for u8 {
75    fn from(val: Dna) -> Self {
76        val.0
77    }
78}
79
80impl From<dna::Dna> for Dna {
81    fn from(base: dna::Dna) -> Self {
82        match base {
83            dna::Dna::A => Dna(b'A'),
84            dna::Dna::C => Dna(b'C'),
85            dna::Dna::G => Dna(b'G'),
86            dna::Dna::T => Dna(b'T'),
87        }
88    }
89}
90
91impl TryFrom<Dna> for dna::Dna {
92    type Error = ParseBioError;
93
94    fn try_from(base: Dna) -> Result<Self, Self::Error> {
95        match base.0 {
96            b'A' => Ok(dna::Dna::A),
97            b'C' => Ok(dna::Dna::C),
98            b'G' => Ok(dna::Dna::G),
99            b'T' => Ok(dna::Dna::T),
100            // Todo: decide whether to support lower cases
101            /*
102            b'a' => Ok(dna::Dna::A),
103            b'c' => Ok(dna::Dna::C),
104            b'g' => Ok(dna::Dna::G),
105            b't' => Ok(dna::Dna::T),
106            */
107            _ => Err(ParseBioError::UnrecognisedBase(base.0)),
108        }
109    }
110}
111
112#[cfg(test)]
113mod tests {
114    use crate::codec::text;
115    use crate::prelude::*;
116
117    #[test]
118    fn test_text_dna_encoding() {
119        let s: Seq<text::Dna> = "CATCGCGACTGATCACTCGATC".try_into().unwrap();
120        println!("{s}");
121        assert_eq!(s.nth(0), Dna::C.into());
122        assert_eq!(s.nth(4), Dna::G.into());
123        assert_eq!(s.nth(6), Dna::G.into());
124        assert_eq!(s.nth(8), Dna::C.into());
125
126        assert_eq!(Dna::C, s.nth(0).try_into().unwrap());
127        assert_eq!(Dna::G, s.nth(4).try_into().unwrap());
128        assert_eq!(Dna::G, s.nth(6).try_into().unwrap());
129        assert_eq!(Dna::C, s.nth(8).try_into().unwrap());
130
131        assert_ne!(s.nth(1), Dna::T.into());
132        assert_ne!(s.nth(3), Dna::A.into());
133    }
134}