use serde::{Deserialize, Serialize};
pub trait AlphabetEncoding {
type Symbol: Copy;
fn encode(symbol: u8) -> Self::Symbol;
fn n_symbols() -> usize;
fn is_gap(symbol: Self::Symbol) -> bool;
}
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub enum DnaSymbol {
A,
C,
G,
T,
N,
Gap,
}
pub struct DNA;
impl AlphabetEncoding for DNA {
type Symbol = DnaSymbol;
fn encode(symbol: u8) -> Self::Symbol {
match symbol {
b'A' | b'a' => DnaSymbol::A,
b'C' | b'c' => DnaSymbol::C,
b'G' | b'g' => DnaSymbol::G,
b'T' | b't' => DnaSymbol::T,
b'U' | b'u' => DnaSymbol::T,
b'N' | b'n' => DnaSymbol::N,
b'-' => DnaSymbol::Gap,
b'R' | b'r' | b'Y' | b'y' | b'S' | b's' | b'W' | b'w' | b'K' | b'k'
| b'M' | b'm' | b'B' | b'b' | b'D' | b'd' | b'H' | b'h' | b'V' | b'v' => {
DnaSymbol::N
}
_ => DnaSymbol::N, }
}
fn n_symbols() -> usize {
6 }
fn is_gap(symbol: DnaSymbol) -> bool {
symbol == DnaSymbol::Gap
}
}
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub enum ProteinSymbol {
A,
R,
N,
D,
C,
Q,
E,
G,
H,
I,
L,
K,
M,
F,
P,
S,
T,
W,
Y,
V,
X,
Gap,
}
pub struct Protein;
impl AlphabetEncoding for Protein {
type Symbol = ProteinSymbol;
fn encode(symbol: u8) -> Self::Symbol {
match symbol {
b'A' => ProteinSymbol::A,
b'R' => ProteinSymbol::R,
b'N' => ProteinSymbol::N,
b'D' => ProteinSymbol::D,
b'C' => ProteinSymbol::C,
b'Q' => ProteinSymbol::Q,
b'E' => ProteinSymbol::E,
b'G' => ProteinSymbol::G,
b'H' => ProteinSymbol::H,
b'I' => ProteinSymbol::I,
b'L' => ProteinSymbol::L,
b'K' => ProteinSymbol::K,
b'M' => ProteinSymbol::M,
b'F' => ProteinSymbol::F,
b'P' => ProteinSymbol::P,
b'S' => ProteinSymbol::S,
b'T' => ProteinSymbol::T,
b'W' => ProteinSymbol::W,
b'Y' => ProteinSymbol::Y,
b'V' => ProteinSymbol::V,
b'X' => ProteinSymbol::X,
b'-' => ProteinSymbol::Gap,
_ => ProteinSymbol::X, }
}
fn n_symbols() -> usize {
22 }
fn is_gap(symbol: ProteinSymbol) -> bool {
symbol == ProteinSymbol::Gap
}
}
#[derive(Clone, Debug, PartialEq, Serialize, Deserialize, ts_rs::TS)]
#[ts(export, export_to = "../../wasm/types/lib_types.ts")]
#[cfg_attr(feature = "cli", derive(clap::ValueEnum))]
pub enum Alphabet {
DNA,
Protein,
}
#[cfg(test)]
macro_rules! dna {
($s:expr) => {
$s.bytes()
.map(|b| $crate::alphabet::DNA::encode(b))
.collect::<Vec<$crate::alphabet::DnaSymbol>>()
};
}
#[cfg(test)]
pub(crate) use dna;
#[cfg(test)]
macro_rules! protein {
($s:expr) => {
$s.bytes()
.map(|b| $crate::alphabet::Protein::encode(b))
.collect::<Vec<$crate::alphabet::ProteinSymbol>>()
};
}
#[cfg(test)]
pub(crate) use protein;
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_dna_encoding() {
assert_eq!(DNA::encode(b'A'), DnaSymbol::A);
assert_eq!(DNA::encode(b'c'), DnaSymbol::C);
assert_eq!(DNA::encode(b'G'), DnaSymbol::G);
assert_eq!(DNA::encode(b't'), DnaSymbol::T);
assert_eq!(DNA::encode(b'N'), DnaSymbol::N);
assert_eq!(DNA::encode(b'-'), DnaSymbol::Gap);
assert_eq!(DNA::encode(b'X'), DnaSymbol::N); }
#[test]
fn test_protein_encoding() {
assert_eq!(Protein::encode(b'A'), ProteinSymbol::A);
assert_eq!(Protein::encode(b'R'), ProteinSymbol::R);
assert_eq!(Protein::encode(b'N'), ProteinSymbol::N);
assert_eq!(Protein::encode(b'D'), ProteinSymbol::D);
assert_eq!(Protein::encode(b'C'), ProteinSymbol::C);
assert_eq!(Protein::encode(b'X'), ProteinSymbol::X);
assert_eq!(Protein::encode(b'-'), ProteinSymbol::Gap);
assert_eq!(Protein::encode(b'Z'), ProteinSymbol::X); }
#[test]
fn test_dna_macro_produces_correct_symbols() {
assert_eq!(
dna!("ACGT"),
vec![DnaSymbol::A, DnaSymbol::C, DnaSymbol::G, DnaSymbol::T]
);
}
#[test]
fn test_dna_macro_length() {
assert_eq!(dna!("ACGTN-").len(), 6);
}
#[test]
fn test_dna_macro_gap_and_n() {
assert_eq!(dna!("-N"), vec![DnaSymbol::Gap, DnaSymbol::N]);
}
#[test]
fn test_dna_macro_lowercase() {
assert_eq!(
dna!("acgt"),
vec![DnaSymbol::A, DnaSymbol::C, DnaSymbol::G, DnaSymbol::T]
);
}
#[test]
fn test_dna_macro_empty() {
assert_eq!(dna!(""), Vec::<DnaSymbol>::new());
}
#[test]
fn test_protein_macro_produces_correct_symbols() {
assert_eq!(
protein!("ARND"),
vec![
ProteinSymbol::A,
ProteinSymbol::R,
ProteinSymbol::N,
ProteinSymbol::D
]
);
}
#[test]
fn test_protein_macro_length() {
assert_eq!(protein!("ACDEFGHIKLMNPQRSTVWY").len(), 20);
}
#[test]
fn test_protein_macro_gap_and_unknown() {
assert_eq!(protein!("-Z"), vec![ProteinSymbol::Gap, ProteinSymbol::X]);
}
#[test]
fn test_protein_macro_empty() {
assert_eq!(protein!(""), Vec::<ProteinSymbol>::new());
}
}