use std::cmp::Ordering;
use std::fmt::{Display, Formatter};
use std::hash::Hash;
use std::num::NonZeroU32;
use std::ops::{BitOr, BitOrAssign};
use std::str::FromStr;
use crate::error::ParseSymbolError;
use crate::iter::AmbiAminoIter;
use crate::{Seq, Symbol};
#[derive(Clone, Copy, Default, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
#[repr(u8)]
pub enum Amino {
#[default]
Stop = b'*',
A = b'A',
C = b'C',
D = b'D',
E = b'E',
F = b'F',
G = b'G',
H = b'H',
I = b'I',
K = b'K',
L = b'L',
M = b'M',
N = b'N',
O = b'O',
P = b'P',
Q = b'Q',
R = b'R',
S = b'S',
T = b'T',
U = b'U',
V = b'V',
W = b'W',
Y = b'Y',
}
impl Amino {
pub const ALL: [Self; 23] = Self::arr(b"*ACDEFGHIKLMNOPQRSTUVWY");
#[must_use]
pub fn to_str(self) -> &'static str {
match self {
Self::Stop => "*",
Self::A => "A",
Self::C => "C",
Self::D => "D",
Self::E => "E",
Self::F => "F",
Self::G => "G",
Self::H => "H",
Self::I => "I",
Self::K => "K",
Self::L => "L",
Self::M => "M",
Self::N => "N",
Self::O => "O",
Self::P => "P",
Self::Q => "Q",
Self::R => "R",
Self::S => "S",
Self::T => "T",
Self::U => "U",
Self::V => "V",
Self::W => "W",
Self::Y => "Y",
}
}
pub const fn from_ascii(ascii: u8) -> Result<Self, ParseSymbolError> {
Ok(match ascii {
b'*' => Self::Stop,
b'A' | b'a' => Self::A,
b'C' | b'c' => Self::C,
b'D' | b'd' => Self::D,
b'E' | b'e' => Self::E,
b'F' | b'f' => Self::F,
b'G' | b'g' => Self::G,
b'H' | b'h' => Self::H,
b'I' | b'i' => Self::I,
b'K' | b'k' => Self::K,
b'L' | b'l' => Self::L,
b'M' | b'm' => Self::M,
b'N' | b'n' => Self::N,
b'O' | b'o' => Self::O,
b'P' | b'p' => Self::P,
b'Q' | b'q' => Self::Q,
b'R' | b'r' => Self::R,
b'S' | b's' => Self::S,
b'T' | b't' => Self::T,
b'U' | b'u' => Self::U,
b'V' | b'v' => Self::V,
b'W' | b'w' => Self::W,
b'Y' | b'y' => Self::Y,
_ => return Err(ParseSymbolError::new::<Self>()),
})
}
#[must_use]
pub fn to_ascii(self) -> u8 {
self.to_str().as_bytes()[0]
}
#[must_use]
#[track_caller]
pub const fn arr<const N: usize>(literal: &[u8; N]) -> [Amino; N] {
let mut aas = [Self::A; N];
let mut i = 0;
while i < literal.len() {
let Ok(aa) = Self::from_ascii(literal[i]) else {
panic!("Invalid Amino in literal");
};
aas[i] = aa;
i += 1;
}
aas
}
#[must_use]
#[track_caller]
pub const fn seq<const N: usize>(literal: &[u8; N]) -> Seq<[Amino; N]> {
Seq(Self::arr(literal))
}
}
impl Display for Amino {
fn fmt(&self, f: &mut Formatter) -> std::fmt::Result {
self.to_str().fmt(f)
}
}
impl FromStr for Amino {
type Err = ParseSymbolError;
fn from_str(s: &str) -> Result<Self, Self::Err> {
match s.as_bytes() {
[b] => Self::from_ascii(*b),
_ => Err(ParseSymbolError::new::<Self>()),
}
}
}
impl AsRef<Amino> for Amino {
fn as_ref(&self) -> &Amino {
self
}
}
impl AsMut<Amino> for Amino {
fn as_mut(&mut self) -> &mut Amino {
self
}
}
#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct AmbiAmino(NonZeroU32);
macro_rules! ambi_amino_consts {
( $( #[doc = $doc:literal] $ambi_amino:ident = $($amino:ident)|+ ),+ $(,)? ) => {
$(
#[doc = $doc]
pub const $ambi_amino: Self = const {
let bits = 1 | $(Self::bit_mask(Amino::$amino))|+;
Self(NonZeroU32::new(bits).expect("BUG: somehow 1 | X == 0"))
};
)+
}
}
impl AmbiAmino {
#[allow(non_upper_case_globals)]
pub const Stop: Self = Self::STOP;
ambi_amino_consts!(
STOP = Stop,
A = A,
B = D | N,
C = C,
D = D,
E = E,
F = F,
G = G,
H = H,
I = I,
J = L | I,
K = K,
L = L,
M = M,
N = N,
O = O,
P = P,
Q = Q,
R = R,
S = S,
T = T,
U = U,
V = V,
W = W,
Y = Y,
Z = E | Q,
);
pub const X: Self = const {
let mut i = 0;
let mut bits = 1;
while i < Amino::ALL.len() {
bits |= Self::bit_mask(Amino::ALL[i]);
i += 1;
}
Self(NonZeroU32::new(bits).expect("BUG: somehow 1 | X == 0"))
};
#[must_use]
pub fn to_bits(self) -> NonZeroU32 {
self.0
}
#[must_use]
pub fn to_str(self) -> &'static str {
match self {
Self::STOP => "*",
Self::A => "A",
Self::B => "B",
Self::C => "C",
Self::D => "D",
Self::E => "E",
Self::F => "F",
Self::G => "G",
Self::H => "H",
Self::I => "I",
Self::J => "J",
Self::K => "K",
Self::L => "L",
Self::M => "M",
Self::N => "N",
Self::O => "O",
Self::P => "P",
Self::Q => "Q",
Self::R => "R",
Self::S => "S",
Self::T => "T",
Self::U => "U",
Self::V => "V",
Self::W => "W",
Self::Y => "Y",
Self::Z => "Z",
_ => "X",
}
}
pub const fn from_ascii(ascii: u8) -> Result<Self, ParseSymbolError> {
Ok(match ascii {
b'*' => Self::STOP,
b'A' | b'a' => Self::A,
b'B' | b'b' => Self::B,
b'C' | b'c' => Self::C,
b'D' | b'd' => Self::D,
b'E' | b'e' => Self::E,
b'F' | b'f' => Self::F,
b'G' | b'g' => Self::G,
b'H' | b'h' => Self::H,
b'I' | b'i' => Self::I,
b'J' | b'j' => Self::J,
b'K' | b'k' => Self::K,
b'L' | b'l' => Self::L,
b'M' | b'm' => Self::M,
b'N' | b'n' => Self::N,
b'O' | b'o' => Self::O,
b'P' | b'p' => Self::P,
b'Q' | b'q' => Self::Q,
b'R' | b'r' => Self::R,
b'S' | b's' => Self::S,
b'T' | b't' => Self::T,
b'U' | b'u' => Self::U,
b'V' | b'v' => Self::V,
b'W' | b'w' => Self::W,
b'X' | b'x' => Self::X,
b'Y' | b'y' => Self::Y,
b'Z' | b'z' => Self::Z,
_ => return Err(ParseSymbolError::new::<Self>()),
})
}
#[must_use]
pub fn to_ascii(self) -> u8 {
self.to_str().as_bytes()[0]
}
#[must_use]
#[track_caller]
pub const fn arr<const N: usize>(literal: &[u8; N]) -> [AmbiAmino; N] {
let mut aas = [Self::A; N];
let mut i = 0;
while i < literal.len() {
let Ok(aa) = Self::from_ascii(literal[i]) else {
panic!("Invalid Amino in literal");
};
aas[i] = aa;
i += 1;
}
aas
}
#[must_use]
#[track_caller]
pub const fn seq<const N: usize>(literal: &[u8; N]) -> Seq<[AmbiAmino; N]> {
Seq(Self::arr(literal))
}
pub fn iter(self) -> AmbiAminoIter {
AmbiAminoIter::new(self)
}
pub(crate) const fn from_amino(amino: Amino) -> AmbiAmino {
Self(NonZeroU32::new(1 | Self::bit_mask(amino)).expect("x OR 1 is non-zero"))
}
pub(crate) const fn or(self, rhs: AmbiAmino) -> AmbiAmino {
Self(NonZeroU32::new(self.0.get() | rhs.0.get()).expect("non-zero OR non-zero is non-zero"))
}
pub(crate) const fn bit_offset(amino: Amino) -> u8 {
(amino as u8) % 32
}
pub(crate) const fn bit_mask(amino: Amino) -> u32 {
1 << Self::bit_offset(amino)
}
}
impl Default for AmbiAmino {
fn default() -> AmbiAmino {
AmbiAmino::X
}
}
impl BitOr for AmbiAmino {
type Output = AmbiAmino;
fn bitor(self, rhs: AmbiAmino) -> Self::Output {
Self(self.0 | rhs.0)
}
}
impl BitOr for &AmbiAmino {
type Output = AmbiAmino;
fn bitor(self, rhs: &AmbiAmino) -> Self::Output {
*self | *rhs
}
}
impl BitOr<Amino> for AmbiAmino {
type Output = AmbiAmino;
fn bitor(self, rhs: Amino) -> Self::Output {
self | AmbiAmino::from(rhs)
}
}
impl BitOr<&Amino> for &AmbiAmino {
type Output = AmbiAmino;
fn bitor(self, rhs: &Amino) -> Self::Output {
*self | *rhs
}
}
impl BitOr<AmbiAmino> for Amino {
type Output = AmbiAmino;
fn bitor(self, rhs: AmbiAmino) -> Self::Output {
AmbiAmino::from(self) | rhs
}
}
impl BitOr<&AmbiAmino> for &Amino {
type Output = AmbiAmino;
fn bitor(self, rhs: &AmbiAmino) -> Self::Output {
*self | *rhs
}
}
impl BitOr for Amino {
type Output = AmbiAmino;
fn bitor(self, rhs: Amino) -> Self::Output {
AmbiAmino::from(self) | rhs
}
}
impl BitOr for &Amino {
type Output = AmbiAmino;
fn bitor(self, rhs: &Amino) -> Self::Output {
*self | *rhs
}
}
impl BitOrAssign for AmbiAmino {
fn bitor_assign(&mut self, rhs: AmbiAmino) {
*self = *self | rhs;
}
}
impl BitOrAssign<&AmbiAmino> for AmbiAmino {
fn bitor_assign(&mut self, rhs: &AmbiAmino) {
*self |= *rhs;
}
}
impl BitOrAssign<Amino> for AmbiAmino {
fn bitor_assign(&mut self, rhs: Amino) {
*self = *self | rhs;
}
}
impl BitOrAssign<&Amino> for AmbiAmino {
fn bitor_assign(&mut self, rhs: &Amino) {
*self |= *rhs;
}
}
impl PartialEq<Amino> for AmbiAmino {
fn eq(&self, other: &Amino) -> bool {
*self == AmbiAmino::from(*other)
}
}
impl PartialEq<AmbiAmino> for Amino {
fn eq(&self, other: &AmbiAmino) -> bool {
other == self
}
}
impl PartialOrd<Amino> for AmbiAmino {
fn partial_cmp(&self, other: &Amino) -> Option<Ordering> {
Some(self.cmp(&AmbiAmino::from(*other)))
}
}
impl PartialOrd<AmbiAmino> for Amino {
fn partial_cmp(&self, other: &AmbiAmino) -> Option<Ordering> {
other.partial_cmp(self).map(Ordering::reverse)
}
}
impl From<Amino> for AmbiAmino {
fn from(amino: Amino) -> Self {
Self(NonZeroU32::MIN | Self::bit_mask(amino))
}
}
impl TryFrom<AmbiAmino> for Amino {
type Error = AmbiAmino;
fn try_from(amino: AmbiAmino) -> Result<Self, Self::Error> {
Ok(match amino {
AmbiAmino::STOP => Self::Stop,
AmbiAmino::A => Amino::A,
AmbiAmino::C => Amino::C,
AmbiAmino::D => Amino::D,
AmbiAmino::E => Amino::E,
AmbiAmino::F => Amino::F,
AmbiAmino::G => Amino::G,
AmbiAmino::H => Amino::H,
AmbiAmino::I => Amino::I,
AmbiAmino::K => Amino::K,
AmbiAmino::L => Amino::L,
AmbiAmino::M => Amino::M,
AmbiAmino::N => Amino::N,
AmbiAmino::O => Amino::O,
AmbiAmino::P => Amino::P,
AmbiAmino::Q => Amino::Q,
AmbiAmino::R => Amino::R,
AmbiAmino::S => Amino::S,
AmbiAmino::T => Amino::T,
AmbiAmino::U => Amino::U,
AmbiAmino::V => Amino::V,
AmbiAmino::W => Amino::W,
AmbiAmino::Y => Amino::Y,
_ => return Err(amino),
})
}
}
#[cfg(any(feature = "proptest", feature = "rand", test))]
impl AmbiAmino {
pub(crate) const BITS_RANGE: std::ops::Range<u32> = 1..(1 << Amino::ALL.len());
pub(crate) fn from_bits(bits: u32) -> AmbiAmino {
Self(NonZeroU32::MIN | ((bits & 2) << 24) | ((bits & !2) << 1))
}
}
impl Display for AmbiAmino {
fn fmt(&self, f: &mut Formatter) -> std::fmt::Result {
self.to_str().fmt(f)
}
}
impl std::fmt::Debug for AmbiAmino {
fn fmt(&self, f: &mut Formatter) -> std::fmt::Result {
let chr = self.to_str();
if *self != Self::X && chr == "X" {
f.write_str("[")?;
for amino in self {
f.write_str(amino.to_str())?;
}
f.write_str("]")
} else {
f.write_str(chr)
}
}
}
impl FromStr for AmbiAmino {
type Err = ParseSymbolError;
fn from_str(s: &str) -> Result<Self, Self::Err> {
match s.as_bytes() {
[b] => Self::from_ascii(*b),
_ => Err(ParseSymbolError::new::<Self>()),
}
}
}
impl AsRef<AmbiAmino> for AmbiAmino {
fn as_ref(&self) -> &AmbiAmino {
self
}
}
impl AsMut<AmbiAmino> for AmbiAmino {
fn as_mut(&mut self) -> &mut AmbiAmino {
self
}
}
impl IntoIterator for AmbiAmino {
type IntoIter = AmbiAminoIter;
type Item = Amino;
fn into_iter(self) -> Self::IntoIter {
self.iter()
}
}
impl IntoIterator for &AmbiAmino {
type IntoIter = AmbiAminoIter;
type Item = Amino;
fn into_iter(self) -> Self::IntoIter {
self.iter()
}
}
impl Symbol for Amino {
type Concrete = Amino;
type Ambiguous = AmbiAmino;
fn to_str(self) -> &'static str {
Self::to_str(self)
}
fn from_ascii(ascii: u8) -> Result<Self, ParseSymbolError> {
Self::from_ascii(ascii)
}
fn to_ascii(self) -> u8 {
Self::to_ascii(self)
}
fn arr<const N: usize>(literal: &[u8; N]) -> [Self; N] {
Self::arr(literal)
}
fn seq<const N: usize>(literal: &[u8; N]) -> Seq<[Self; N]> {
Self::seq(literal)
}
}
impl Symbol for AmbiAmino {
type Concrete = Amino;
type Ambiguous = AmbiAmino;
fn to_str(self) -> &'static str {
Self::to_str(self)
}
fn from_ascii(ascii: u8) -> Result<Self, ParseSymbolError> {
Self::from_ascii(ascii)
}
fn to_ascii(self) -> u8 {
Self::to_ascii(self)
}
fn arr<const N: usize>(literal: &[u8; N]) -> [Self; N] {
Self::arr(literal)
}
fn seq<const N: usize>(literal: &[u8; N]) -> Seq<[Self; N]> {
Self::seq(literal)
}
}
impl crate::symbol::sealed::Sealed for Amino {
const NAME: &str = "amino acid";
const EXPECTED: &str =
"one of A/C/D/E/F/G/H/I/K/L/M/N/O/P/Q/R/S/T/U/V/W/Y/* (case-insensitive)";
}
impl crate::symbol::sealed::Sealed for AmbiAmino {
const NAME: &str = "ambiguous amino acid";
const EXPECTED: &str = "a letter or the character *";
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn unambiguous_amino_is_its_only_possiblity() {
for aa in Amino::ALL {
assert!(AmbiAmino::from(aa).iter().eq([aa]));
}
}
#[test]
fn unambiguous_pair_of_aminos_are_their_only_possiblities() {
for aa1 in Amino::ALL {
for aa2 in Amino::ALL {
if aa1 < aa2 {
assert!((aa1 | aa2).iter().eq([aa1, aa2]));
}
}
}
}
fn all_ambi_aminos() -> impl Iterator<Item = AmbiAmino> {
(1..(1 << Amino::ALL.len()) - 1).filter_map(|amino_i| {
(0..Amino::ALL.len())
.filter(|bit| amino_i & (1 << bit) != 0)
.map(|bit| AmbiAmino::from(Amino::ALL[bit]))
.reduce(|a, b| a | b)
})
}
#[cfg_attr(debug_assertions, ignore = "slow outside of release mode")]
#[test]
fn amino_possiblities_are_sorted_and_unique() {
for aa in all_ambi_aminos() {
assert!(aa.iter().zip(aa.iter().skip(1)).all(|(a, b)| a < b));
}
}
#[cfg_attr(debug_assertions, ignore = "slow outside of release mode")]
#[test]
fn ambi_amino_bitor_is_itempotent() {
for aa in all_ambi_aminos() {
assert_eq!(aa | aa, aa);
}
}
#[cfg_attr(debug_assertions, ignore = "slow outside of release mode")]
#[test]
fn amino_possibilities_can_be_recomposed_into_ambi_nucs() {
for aa in all_ambi_aminos() {
let possibilities = aa.iter().map(AmbiAmino::from);
assert_eq!(possibilities.reduce(|a, b| a | b), Some(aa));
}
}
#[test]
fn smoke_test_ambi_amino_bitor() {
assert!(
(AmbiAmino::A | AmbiAmino::B)
.iter()
.eq([Amino::A, Amino::D, Amino::N])
);
}
#[test]
fn amino_bitor_with_x_is_x() {
for aa in Amino::ALL {
assert_eq!(aa | AmbiAmino::X, AmbiAmino::X);
assert_eq!(AmbiAmino::X | aa, AmbiAmino::X);
}
}
#[cfg_attr(debug_assertions, ignore = "slow outside of release mode")]
#[test]
fn ambi_amino_bitor_with_x_is_x() {
for aa in all_ambi_aminos() {
assert_eq!(aa | AmbiAmino::X, AmbiAmino::X);
assert_eq!(AmbiAmino::X | aa, AmbiAmino::X);
}
}
#[test]
fn str_roundtrips() {
for aa in Amino::ALL {
assert_eq!(Amino::from_str(aa.to_str()), Ok(aa));
}
}
#[test]
fn ascii_roundtrips() {
for aa in Amino::ALL {
assert_eq!(Amino::from_ascii(aa.to_ascii()), Ok(aa));
}
}
#[test]
fn all_is_sorted() {
let mut sorted = Amino::ALL;
sorted.sort();
assert_eq!(Amino::ALL, sorted);
}
#[test]
fn sanity_check_from_bits_values() {
let all_bits = AmbiAmino::BITS_RANGE.last().unwrap();
assert_eq!(AmbiAmino::from_bits(all_bits), AmbiAmino::X);
let mut concrete_ambi_aminos = std::array::from_fn(|i| AmbiAmino::from_bits(1u32 << i));
let mut expected = Amino::ALL.map(AmbiAmino::from);
concrete_ambi_aminos.sort();
expected.sort();
assert_eq!(concrete_ambi_aminos, expected);
}
}