1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
//! Coding/Decoding trait for bit-packable enums representing biological alphabets
//!
//! The [dna], [iupac], [text], and [amino] alphabets are built in.
//!
//! This trait implements the translation between the UTF-8 representation of an alphabet and it's efficient bit-packing.
//! The `BITS` attribute stores the number of bits used by the representation.
//! ```
//! use bio_seq::prelude::{Dna, Codec};
//! use bio_seq::codec::text;
//! assert_eq!(Dna::BITS, 2);
//! assert_eq!(text::Dna::BITS, 8);
//! ```
//!
//! ## Deriving custom Codecs
//!
//! Custom encodings can be easily defined on enums using the derivable `Codec` trait.
//!
//! ```ignore
//! use bio_seq::prelude;
//! use bio_seq::prelude::Codec;
//!
//! #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, Codec)]
//! pub enum Dna {
//!     A = 0b00,
//!     C = 0b01,
//!     G = 0b10,
//!     T = 0b11,
//! }
//! ```
use core::hash::Hash;

#[macro_use]
pub mod amino;
#[macro_use]
pub mod dna;
#[macro_use]
pub mod iupac;

pub mod text;

pub use bio_seq_derive::Codec;

/// The binary encodings of an alphabet's characters are represented with `u8`s. Encoding from UTF-8 or
/// a raw `u8` will always be fallible but often can be assumed safe.
pub trait Codec: Copy + Clone + Into<u8> + PartialEq + Hash + Eq {
    const BITS: usize;
    type Error: std::error::Error + core::fmt::Display;

    fn unsafe_from_bits(b: u8) -> Self;
    fn try_from_bits(b: u8) -> Result<Self, Self::Error>;
    fn from_char(c: char) -> Result<Self, Self::Error>;
    fn to_char(self) -> char;
}

/// Nucleotide alphabets that can be complemented implement `Complement`
///
/// ```
/// use bio_seq::prelude::{Dna, Complement};
/// assert_eq!(Dna::A.comp(), Dna::T);
/// ````
pub trait Complement {
    fn comp(self) -> Self;
}

#[cfg(test)]
mod tests {
    use super::dna::Dna;
    use super::iupac::Iupac;

    #[test]
    fn dna_to_iupac() {
        assert_eq!(Iupac::from(Dna::A), Iupac::A);
        assert_eq!(Iupac::from(Dna::C), Iupac::C);
        assert_eq!(Iupac::from(Dna::G), Iupac::G);
        assert_eq!(Iupac::from(Dna::T), Iupac::T);

        assert_ne!(Iupac::from(Dna::A), Iupac::T);
        assert_ne!(Iupac::from(Dna::T), Iupac::A);
        assert_ne!(Iupac::from(Dna::C), Iupac::T);
        assert_ne!(Iupac::from(Dna::G), Iupac::T);
    }
}