bio_seq/codec/text.rs
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127
//! 8-bit ASCII representation of nucleotides
//!
//! This encoding is a literal interpretation of bytes of text as DNA
use crate::codec::{dna, Codec, Complement};
use crate::error::ParseBioError;
#[derive(Copy, Clone, PartialEq, Debug, Ord, Eq, PartialOrd, Hash)]
#[repr(transparent)]
pub struct Dna(u8);
impl Codec for Dna {
const BITS: u8 = 8;
fn unsafe_from_bits(b: u8) -> Self {
Self(b)
}
fn try_from_bits(b: u8) -> Option<Self> {
Some(Self(b))
}
fn unsafe_from_ascii(c: u8) -> Self {
Self(c)
}
fn try_from_ascii(c: u8) -> Option<Self> {
// if c.is_ascii_alphanumeric() {
match c {
b'A' | b'C' | b'G' | b'T' | b'N' => Some(Self(c)),
_ => None,
}
}
fn to_char(self) -> char {
self.0.into()
}
fn to_bits(self) -> u8 {
self.0
}
fn items() -> impl Iterator<Item = Self> {
vec![Dna(b'A'), Dna(b'C'), Dna(b'G'), Dna(b'T'), Dna(b'N')].into_iter()
}
}
//impl Eq for Dna {
// fn eq(self, rhs: Self) {
// true
// }
//}
impl Complement for Dna {
fn comp(&self) -> Self {
match self {
Self(b'A') => Self(b'T'),
Self(b'C') => Self(b'G'),
Self(b'G') => Self(b'C'),
Self(b'T') => Self(b'A'),
_ => Self(b'N'),
}
}
}
impl From<Dna> for u8 {
fn from(val: Dna) -> Self {
val.0
}
}
impl From<dna::Dna> for Dna {
fn from(base: dna::Dna) -> Self {
match base {
dna::Dna::A => Dna(b'A'),
dna::Dna::C => Dna(b'C'),
dna::Dna::G => Dna(b'G'),
dna::Dna::T => Dna(b'T'),
}
}
}
impl TryFrom<Dna> for dna::Dna {
type Error = ParseBioError;
fn try_from(base: Dna) -> Result<Self, Self::Error> {
match base.0 {
b'A' => Ok(dna::Dna::A),
b'C' => Ok(dna::Dna::C),
b'G' => Ok(dna::Dna::G),
b'T' => Ok(dna::Dna::T),
// Todo: decide whether to support lower cases
/*
b'a' => Ok(dna::Dna::A),
b'c' => Ok(dna::Dna::C),
b'g' => Ok(dna::Dna::G),
b't' => Ok(dna::Dna::T),
*/
_ => Err(ParseBioError::UnrecognisedBase(base.0)),
}
}
}
/*
#[cfg(test)]
mod tests {
use crate::codec::text;
use crate::prelude::*;
#[test]
fn test_text_dna_encoding() {
let v: Vec<usize> = vec![b'A', b'a', b'a', b'c', b'C', b'c', b'G', b'g', b'T', b'a'];
let s: Seq<text::Dna> = v.into();
assert_eq!(s.nth(0), Dna::A.into());
assert_eq!(s.nth(4), Dna::C.into());
assert_eq!(s.nth(6), Dna::G.into());
assert_eq!(s.nth(8), Dna::T.into());
assert_eq!(Dna::A, s.nth(0).try_into().unwrap());
assert_eq!(Dna::C, s.nth(4).try_into().unwrap());
assert_eq!(Dna::G, s.nth(6).try_into().unwrap());
assert_eq!(Dna::T, s.nth(8).try_into().unwrap());
assert_ne!(s.nth(1), Dna::G.into());
assert_ne!(s.nth(3), Dna::C.into());
}
}
*/