use std::cmp::Ordering;
use std::fmt::{Display, Formatter};
use std::hash::Hash;
use std::ops::{BitOr, BitOrAssign};
use std::str::FromStr;
use crate::error::ParseSymbolError;
use crate::translation::GeneticCode;
use crate::{AmbiAmino, Amino, Seq, Symbol};
#[derive(Clone, Copy, Default, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
#[repr(u8)]
pub enum Nuc {
#[default]
A = 0b0001,
C = 0b0010,
G = 0b0100,
T = 0b1000,
}
impl Nuc {
pub const ALL: [Self; 4] = Self::arr(b"ACGT");
#[must_use]
pub const fn complement(self) -> Self {
match self {
Self::A => Self::T,
Self::C => Self::G,
Self::G => Self::C,
Self::T => Self::A,
}
}
#[must_use]
pub const fn to_str(self) -> &'static str {
match self {
Self::A => "A",
Self::C => "C",
Self::G => "G",
Self::T => "T",
}
}
pub const fn from_ascii(ascii: u8) -> Result<Self, ParseSymbolError> {
Ok(match ascii {
b'a' | b'A' => Self::A,
b'c' | b'C' => Self::C,
b'g' | b'G' => Self::G,
b't' | b'T' => Self::T,
_ => return Err(ParseSymbolError::new::<Self>()),
})
}
#[must_use]
pub const fn to_ascii(self) -> u8 {
self.to_str().as_bytes()[0]
}
#[must_use]
#[track_caller]
pub const fn arr<const N: usize>(literal: &[u8; N]) -> [Nuc; N] {
let mut nucs = [Self::A; N];
let mut i = 0;
while i < literal.len() {
let Ok(nuc) = Self::from_ascii(literal[i]) else {
panic!("Invalid Nuc in literal");
};
nucs[i] = nuc;
i += 1;
}
nucs
}
#[must_use]
#[track_caller]
pub const fn seq<const N: usize>(literal: &[u8; N]) -> Seq<[Nuc; N]> {
Seq(Self::arr(literal))
}
}
impl Display for Nuc {
fn fmt(&self, f: &mut Formatter) -> std::fmt::Result {
self.to_str().fmt(f)
}
}
impl FromStr for Nuc {
type Err = ParseSymbolError;
fn from_str(s: &str) -> Result<Self, Self::Err> {
match s.as_bytes() {
[b] => Self::from_ascii(*b),
_ => Err(ParseSymbolError::new::<Self>()),
}
}
}
impl TryFrom<AmbiNuc> for Nuc {
type Error = AmbiNuc;
fn try_from(nuc: AmbiNuc) -> Result<Self, Self::Error> {
Ok(match nuc {
AmbiNuc::A => Self::A,
AmbiNuc::C => Self::C,
AmbiNuc::G => Self::G,
AmbiNuc::T => Self::T,
other => return Err(other),
})
}
}
impl AsRef<Nuc> for Nuc {
fn as_ref(&self) -> &Nuc {
self
}
}
impl AsMut<Nuc> for Nuc {
fn as_mut(&mut self) -> &mut Nuc {
self
}
}
#[derive(Clone, Copy, Default, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
#[repr(u8)]
pub enum AmbiNuc {
A = Nuc::A as u8,
C = Nuc::C as u8,
G = Nuc::G as u8,
T = Nuc::T as u8,
R = Nuc::A as u8 | Nuc::G as u8,
Y = Nuc::C as u8 | Nuc::T as u8,
M = Nuc::A as u8 | Nuc::C as u8,
K = Nuc::G as u8 | Nuc::T as u8,
W = Nuc::A as u8 | Nuc::T as u8,
S = Nuc::C as u8 | Nuc::G as u8,
B = Nuc::C as u8 | Nuc::G as u8 | Nuc::T as u8,
D = Nuc::A as u8 | Nuc::G as u8 | Nuc::T as u8,
H = Nuc::A as u8 | Nuc::C as u8 | Nuc::T as u8,
V = Nuc::A as u8 | Nuc::C as u8 | Nuc::G as u8,
#[default]
N = Nuc::A as u8 | Nuc::C as u8 | Nuc::G as u8 | Nuc::T as u8,
}
impl AmbiNuc {
pub const ALL: [Self; 15] = Self::arr(b"ACMGRSVTWYHKDBN");
#[must_use]
pub const fn complement(self) -> Self {
match self {
Self::A => Self::T,
Self::C => Self::G,
Self::G => Self::C,
Self::T => Self::A,
Self::R => Self::Y,
Self::Y => Self::R,
Self::M => Self::K,
Self::K => Self::M,
Self::W => Self::W,
Self::S => Self::S,
Self::B => Self::V,
Self::D => Self::H,
Self::H => Self::D,
Self::V => Self::B,
Self::N => Self::N,
}
}
#[must_use]
pub const fn to_str(self) -> &'static str {
match self {
Self::A => "A",
Self::C => "C",
Self::G => "G",
Self::T => "T",
Self::R => "R",
Self::Y => "Y",
Self::M => "M",
Self::K => "K",
Self::W => "W",
Self::S => "S",
Self::B => "B",
Self::D => "D",
Self::H => "H",
Self::V => "V",
Self::N => "N",
}
}
pub const fn from_ascii(ascii: u8) -> Result<Self, ParseSymbolError> {
Ok(match ascii {
b'a' | b'A' => Self::A,
b'c' | b'C' => Self::C,
b'g' | b'G' => Self::G,
b't' | b'T' => Self::T,
b'r' | b'R' => Self::R,
b'y' | b'Y' => Self::Y,
b'm' | b'M' => Self::M,
b'k' | b'K' => Self::K,
b'w' | b'W' => Self::W,
b's' | b'S' => Self::S,
b'b' | b'B' => Self::B,
b'd' | b'D' => Self::D,
b'h' | b'H' => Self::H,
b'v' | b'V' => Self::V,
b'n' | b'N' => Self::N,
_ => return Err(ParseSymbolError::new::<Self>()),
})
}
#[must_use]
pub const fn to_ascii(self) -> u8 {
self.to_str().as_bytes()[0]
}
pub fn iter(self) -> std::iter::Copied<std::slice::Iter<'static, Nuc>> {
self.expansions().iter().copied()
}
#[must_use]
pub const fn expansions(self) -> &'static [Nuc] {
match self {
Self::A => &[Nuc::A] as &[_],
Self::C => &[Nuc::C],
Self::G => &[Nuc::G],
Self::T => &[Nuc::T],
Self::R => &[Nuc::A, Nuc::G],
Self::Y => &[Nuc::C, Nuc::T],
Self::M => &[Nuc::A, Nuc::C],
Self::K => &[Nuc::G, Nuc::T],
Self::W => &[Nuc::A, Nuc::T],
Self::S => &[Nuc::C, Nuc::G],
Self::B => &[Nuc::C, Nuc::G, Nuc::T],
Self::D => &[Nuc::A, Nuc::G, Nuc::T],
Self::H => &[Nuc::A, Nuc::C, Nuc::T],
Self::V => &[Nuc::A, Nuc::C, Nuc::G],
Self::N => &[Nuc::A, Nuc::C, Nuc::G, Nuc::T],
}
}
#[must_use]
#[track_caller]
pub const fn arr<const N: usize>(literal: &[u8; N]) -> [AmbiNuc; N] {
let mut nucs = [Self::A; N];
let mut i = 0;
while i < literal.len() {
let Ok(nuc) = Self::from_ascii(literal[i]) else {
panic!("Invalid AmbiNuc in literal");
};
nucs[i] = nuc;
i += 1;
}
nucs
}
#[must_use]
#[track_caller]
pub const fn seq<const N: usize>(literal: &[u8; N]) -> Seq<[AmbiNuc; N]> {
Seq(Self::arr(literal))
}
pub(crate) fn from_u8(byte: u8) -> Option<Self> {
macro_rules! from_u8 {
($byte:expr, $($variant:ident)+) => {{
$(const $variant: u8 = AmbiNuc::$variant as u8;)+
match $byte {
$($variant => Some(AmbiNuc::$variant),)+
_ => None,
}
}};
}
from_u8!(byte, A C M G R S V T W Y H K D B N)
}
}
impl BitOr for AmbiNuc {
type Output = AmbiNuc;
fn bitor(self, rhs: AmbiNuc) -> Self::Output {
Self::from_u8(self as u8 | rhs as u8).expect("BUG: invalid nucleotide encountered")
}
}
impl BitOr for &AmbiNuc {
type Output = AmbiNuc;
fn bitor(self, rhs: &AmbiNuc) -> Self::Output {
*self | *rhs
}
}
impl BitOr<Nuc> for AmbiNuc {
type Output = AmbiNuc;
fn bitor(self, rhs: Nuc) -> Self::Output {
self | AmbiNuc::from(rhs)
}
}
impl BitOr<&Nuc> for &AmbiNuc {
type Output = AmbiNuc;
fn bitor(self, rhs: &Nuc) -> Self::Output {
*self | *rhs
}
}
impl BitOr<AmbiNuc> for Nuc {
type Output = AmbiNuc;
fn bitor(self, rhs: AmbiNuc) -> Self::Output {
AmbiNuc::from(self) | rhs
}
}
impl BitOr<&AmbiNuc> for &Nuc {
type Output = AmbiNuc;
fn bitor(self, rhs: &AmbiNuc) -> Self::Output {
*self | *rhs
}
}
impl BitOr for Nuc {
type Output = AmbiNuc;
fn bitor(self, rhs: Nuc) -> Self::Output {
AmbiNuc::from(self) | rhs
}
}
impl BitOr for &Nuc {
type Output = AmbiNuc;
fn bitor(self, rhs: &Nuc) -> Self::Output {
*self | *rhs
}
}
impl BitOrAssign for AmbiNuc {
fn bitor_assign(&mut self, rhs: AmbiNuc) {
*self = *self | rhs;
}
}
impl BitOrAssign<&AmbiNuc> for AmbiNuc {
fn bitor_assign(&mut self, rhs: &AmbiNuc) {
*self |= *rhs;
}
}
impl BitOrAssign<Nuc> for AmbiNuc {
fn bitor_assign(&mut self, rhs: Nuc) {
*self = *self | rhs;
}
}
impl BitOrAssign<&Nuc> for AmbiNuc {
fn bitor_assign(&mut self, rhs: &Nuc) {
*self |= *rhs;
}
}
impl PartialEq<Nuc> for AmbiNuc {
fn eq(&self, other: &Nuc) -> bool {
*self == AmbiNuc::from(*other)
}
}
impl PartialEq<AmbiNuc> for Nuc {
fn eq(&self, other: &AmbiNuc) -> bool {
other == self
}
}
impl PartialOrd<Nuc> for AmbiNuc {
fn partial_cmp(&self, other: &Nuc) -> Option<Ordering> {
Some(self.cmp(&AmbiNuc::from(*other)))
}
}
impl PartialOrd<AmbiNuc> for Nuc {
fn partial_cmp(&self, other: &AmbiNuc) -> Option<Ordering> {
other.partial_cmp(self).map(Ordering::reverse)
}
}
impl Display for AmbiNuc {
fn fmt(&self, f: &mut Formatter) -> std::fmt::Result {
self.to_str().fmt(f)
}
}
impl FromStr for AmbiNuc {
type Err = ParseSymbolError;
fn from_str(s: &str) -> Result<Self, Self::Err> {
match s.as_bytes() {
[b] => Self::from_ascii(*b),
_ => Err(ParseSymbolError::new::<Self>()),
}
}
}
impl From<Nuc> for AmbiNuc {
fn from(nuc: Nuc) -> Self {
match nuc {
Nuc::A => Self::A,
Nuc::C => Self::C,
Nuc::G => Self::G,
Nuc::T => Self::T,
}
}
}
impl AsRef<AmbiNuc> for AmbiNuc {
fn as_ref(&self) -> &AmbiNuc {
self
}
}
impl AsMut<AmbiNuc> for AmbiNuc {
fn as_mut(&mut self) -> &mut AmbiNuc {
self
}
}
impl IntoIterator for AmbiNuc {
type IntoIter = std::iter::Copied<std::slice::Iter<'static, Nuc>>;
type Item = Nuc;
fn into_iter(self) -> Self::IntoIter {
self.iter()
}
}
impl IntoIterator for &AmbiNuc {
type IntoIter = std::iter::Copied<std::slice::Iter<'static, Nuc>>;
type Item = Nuc;
fn into_iter(self) -> Self::IntoIter {
self.iter()
}
}
pub trait Nucleotide: Symbol {
type Amino: Symbol;
const ALL: &[Self];
#[must_use]
fn complement(self) -> Self;
fn translate<G: GeneticCode + ?Sized>(genetic_code: &G, codon: [Self; 3]) -> Self::Amino;
fn translate_rc<G: GeneticCode + ?Sized>(genetic_code: &G, codon: [Self; 3]) -> Self::Amino;
}
impl Nucleotide for Nuc {
type Amino = Amino;
const ALL: &[Self] = &Nuc::ALL;
fn complement(self) -> Self {
Nuc::complement(self)
}
fn translate<G: GeneticCode + ?Sized>(genetic_code: &G, codon: [Self; 3]) -> Self::Amino {
genetic_code.translate_concrete_codon(codon)
}
fn translate_rc<G: GeneticCode + ?Sized>(genetic_code: &G, codon: [Self; 3]) -> Self::Amino {
genetic_code.translate_rc_concrete_codon(codon)
}
}
impl Nucleotide for AmbiNuc {
type Amino = AmbiAmino;
const ALL: &[Self] = &AmbiNuc::ALL;
fn complement(self) -> Self {
AmbiNuc::complement(self)
}
fn translate<G: GeneticCode + ?Sized>(genetic_code: &G, codon: [Self; 3]) -> Self::Amino {
genetic_code.translate_ambiguous_codon(codon)
}
fn translate_rc<G: GeneticCode + ?Sized>(genetic_code: &G, codon: [Self; 3]) -> Self::Amino {
genetic_code.translate_rc_ambiguous_codon(codon)
}
}
impl Symbol for Nuc {
type Concrete = Nuc;
type Ambiguous = AmbiNuc;
fn to_str(self) -> &'static str {
Self::to_str(self)
}
fn from_ascii(ascii: u8) -> Result<Self, ParseSymbolError> {
Self::from_ascii(ascii)
}
fn to_ascii(self) -> u8 {
Self::to_ascii(self)
}
fn arr<const N: usize>(literal: &[u8; N]) -> [Self; N] {
Self::arr(literal)
}
fn seq<const N: usize>(literal: &[u8; N]) -> Seq<[Self; N]> {
Self::seq(literal)
}
}
impl Symbol for AmbiNuc {
type Concrete = Nuc;
type Ambiguous = AmbiNuc;
fn to_str(self) -> &'static str {
Self::to_str(self)
}
fn from_ascii(ascii: u8) -> Result<Self, ParseSymbolError> {
Self::from_ascii(ascii)
}
fn to_ascii(self) -> u8 {
Self::to_ascii(self)
}
fn arr<const N: usize>(literal: &[u8; N]) -> [Self; N] {
Self::arr(literal)
}
fn seq<const N: usize>(literal: &[u8; N]) -> Seq<[Self; N]> {
Self::seq(literal)
}
}
impl crate::symbol::sealed::Sealed for Nuc {
const NAME: &str = "nucleotide";
const EXPECTED: &str = "one of A/C/G/T (case-insensitive)";
#[cfg(feature = "unsafe")]
fn as_ambi_nucs(nucs: &[Self]) -> &[AmbiNuc] {
crate::casts::nucs_as_ambi(nucs)
}
#[cfg(feature = "unsafe")]
fn to_nucs(nucs: &[Self]) -> Option<&[Nuc]> {
Some(nucs)
}
#[cfg(feature = "unsafe")]
fn to_nucs_mut(nucs: &mut [Self]) -> Option<&mut [Nuc]> {
Some(nucs)
}
}
impl crate::symbol::sealed::Sealed for AmbiNuc {
const NAME: &str = "ambiguous nucleotide";
const EXPECTED: &str = "one of A/C/G/T or B/D/H/K/M/N/R/S/V/W/Y (case-insensitive)";
#[cfg(feature = "unsafe")]
fn as_ambi_nucs(nucs: &[Self]) -> &[AmbiNuc] {
nucs
}
#[cfg(feature = "unsafe")]
fn to_nucs(nucs: &[Self]) -> Option<&[Nuc]> {
crate::casts::ambi_to_nucs(nucs)
}
#[cfg(feature = "unsafe")]
fn to_nucs_mut(nucs: &mut [Self]) -> Option<&mut [Nuc]> {
crate::casts::ambi_to_nucs_mut(nucs)
}
}
#[cfg(test)]
mod tests {
use crate::DnaSlice;
use super::*;
#[test]
fn complementation_is_involution() {
for nuc in Nuc::ALL {
assert_eq!(nuc.complement().complement(), nuc);
}
for nuc in AmbiNuc::ALL {
assert_eq!(nuc.complement().complement(), nuc);
}
}
#[test]
fn complementation_commutes_with_expansions() {
for nuc in AmbiNuc::ALL {
let mut expected: Vec<_> = nuc.expansions().to_vec();
expected.complement();
expected.sort();
assert_eq!(nuc.complement().expansions(), expected);
}
}
#[test]
fn unambiguous_nuc_is_its_only_expansion() {
for nuc in Nuc::ALL {
assert_eq!(AmbiNuc::from(nuc).expansions(), [nuc]);
}
}
#[test]
fn unambiguous_pair_of_nucs_are_their_only_expansions() {
for nuc1 in Nuc::ALL {
for nuc2 in Nuc::ALL {
if nuc1 < nuc2 {
assert_eq!((nuc1 | nuc2).expansions(), [nuc1, nuc2]);
}
}
}
}
#[test]
fn nuc_expansions_are_sorted_and_unique() {
for nuc in AmbiNuc::ALL {
assert!(nuc.iter().zip(nuc.iter().skip(1)).all(|(a, b)| a < b));
}
}
#[test]
fn ambi_nuc_bitor_is_itempotent() {
for nuc in AmbiNuc::ALL {
assert_eq!(nuc | nuc, nuc);
}
}
#[test]
fn expansions_can_be_recomposed_into_ambi_nucs() {
for nuc in AmbiNuc::ALL {
assert_eq!(
nuc.iter().map(AmbiNuc::from).reduce(|a, b| a | b),
Some(nuc)
);
}
}
#[test]
fn ambi_nuc_bitor_is_consistent_with_expansions() {
for ambi1 in AmbiNuc::ALL {
for ambi2 in AmbiNuc::ALL {
let mut expected: Vec<_> = ambi1.iter().chain(ambi2).collect();
expected.sort();
expected.dedup();
assert_eq!((ambi1 | ambi2).expansions(), expected);
}
}
}
#[test]
fn ambi_nuc_bitor_with_n_is_n() {
for nuc in AmbiNuc::ALL {
assert_eq!(nuc | AmbiNuc::N, AmbiNuc::N);
assert_eq!(AmbiNuc::N | nuc, AmbiNuc::N);
}
}
#[test]
fn str_roundtrips() {
for nuc in Nuc::ALL {
assert_eq!(Nuc::from_str(nuc.to_str()), Ok(nuc));
}
for nuc in AmbiNuc::ALL {
assert_eq!(AmbiNuc::from_str(nuc.to_str()), Ok(nuc));
}
}
#[test]
fn ascii_roundtrips() {
for nuc in Nuc::ALL {
assert_eq!(Nuc::from_ascii(nuc.to_ascii()), Ok(nuc));
}
for nuc in AmbiNuc::ALL {
assert_eq!(AmbiNuc::from_ascii(nuc.to_ascii()), Ok(nuc));
}
}
#[test]
fn all_is_sorted() {
let mut sorted = Nuc::ALL;
sorted.sort();
assert_eq!(Nuc::ALL, sorted);
let mut sorted = AmbiNuc::ALL;
sorted.sort();
assert_eq!(AmbiNuc::ALL, sorted);
}
}