use std::marker::PhantomData;
use serde::{Deserialize, Serialize};
use crate::{
chemistry::{AmbiguousLabel, Chemical, MolecularFormula, MultiChemical, SatelliteLabel},
molecular_formula,
quantities::Multi,
sequence::{AminoAcid, IsAminoAcid, SemiAmbiguous, SequencePosition, UnAmbiguous},
};
#[derive(Debug, Deserialize, Ord, PartialOrd, Serialize)]
pub struct CheckedAminoAcid<T> {
aminoacid: AminoAcid,
marker: PhantomData<T>,
}
#[expect(non_upper_case_globals, missing_docs)]
impl CheckedAminoAcid<UnAmbiguous> {
pub const A: Self = Self::Alanine;
pub const C: Self = Self::Cysteine;
pub const D: Self = Self::AsparticAcid;
pub const E: Self = Self::GlutamicAcid;
pub const F: Self = Self::Phenylalanine;
pub const G: Self = Self::Glycine;
pub const H: Self = Self::Histidine;
pub const I: Self = Self::Isoleucine;
pub const J: Self = Self::AmbiguousLeucine;
pub const K: Self = Self::Lysine;
pub const L: Self = Self::Leucine;
pub const M: Self = Self::Methionine;
pub const N: Self = Self::Asparagine;
pub const O: Self = Self::Pyrrolysine;
pub const P: Self = Self::Proline;
pub const Q: Self = Self::Glutamine;
pub const R: Self = Self::Arginine;
pub const S: Self = Self::Serine;
pub const T: Self = Self::Threonine;
pub const U: Self = Self::Selenocysteine;
pub const V: Self = Self::Valine;
pub const W: Self = Self::Tryptophan;
pub const X: Self = Self::Unknown;
pub const Y: Self = Self::Tyrosine;
pub const Ala: Self = Self::Alanine;
pub const Cys: Self = Self::Cysteine;
pub const Asn: Self = Self::Asparagine;
pub const Asp: Self = Self::AsparticAcid;
pub const Glu: Self = Self::GlutamicAcid;
pub const Phe: Self = Self::Phenylalanine;
pub const Gly: Self = Self::Glycine;
pub const His: Self = Self::Histidine;
pub const Ile: Self = Self::Isoleucine;
pub const Xle: Self = Self::AmbiguousLeucine;
pub const Lys: Self = Self::Lysine;
pub const Leu: Self = Self::Leucine;
pub const Met: Self = Self::Methionine;
pub const Pyl: Self = Self::Pyrrolysine;
pub const Pro: Self = Self::Proline;
pub const Gln: Self = Self::Glutamine;
pub const Arg: Self = Self::Arginine;
pub const Ser: Self = Self::Serine;
pub const Thr: Self = Self::Threonine;
pub const Sec: Self = Self::Selenocysteine;
pub const Val: Self = Self::Valine;
pub const Trp: Self = Self::Tryptophan;
pub const Tyr: Self = Self::Tyrosine;
pub const Xaa: Self = Self::Unknown;
pub const Alanine: Self = Self {
aminoacid: AminoAcid::Alanine,
marker: PhantomData,
};
pub const Cysteine: Self = Self {
aminoacid: AminoAcid::Cysteine,
marker: PhantomData,
};
pub const AsparticAcid: Self = Self {
aminoacid: AminoAcid::AsparticAcid,
marker: PhantomData,
};
pub const GlutamicAcid: Self = Self {
aminoacid: AminoAcid::GlutamicAcid,
marker: PhantomData,
};
pub const Phenylalanine: Self = Self {
aminoacid: AminoAcid::Phenylalanine,
marker: PhantomData,
};
pub const Glycine: Self = Self {
aminoacid: AminoAcid::Glycine,
marker: PhantomData,
};
pub const Histidine: Self = Self {
aminoacid: AminoAcid::Histidine,
marker: PhantomData,
};
pub const Isoleucine: Self = Self {
aminoacid: AminoAcid::Isoleucine,
marker: PhantomData,
};
pub const AmbiguousLeucine: Self = Self {
aminoacid: AminoAcid::AmbiguousLeucine,
marker: PhantomData,
};
pub const Lysine: Self = Self {
aminoacid: AminoAcid::Lysine,
marker: PhantomData,
};
pub const Leucine: Self = Self {
aminoacid: AminoAcid::Leucine,
marker: PhantomData,
};
pub const Methionine: Self = Self {
aminoacid: AminoAcid::Methionine,
marker: PhantomData,
};
pub const Asparagine: Self = Self {
aminoacid: AminoAcid::Asparagine,
marker: PhantomData,
};
pub const Pyrrolysine: Self = Self {
aminoacid: AminoAcid::Pyrrolysine,
marker: PhantomData,
};
pub const Proline: Self = Self {
aminoacid: AminoAcid::Proline,
marker: PhantomData,
};
pub const Glutamine: Self = Self {
aminoacid: AminoAcid::Glutamine,
marker: PhantomData,
};
pub const Arginine: Self = Self {
aminoacid: AminoAcid::Arginine,
marker: PhantomData,
};
pub const Serine: Self = Self {
aminoacid: AminoAcid::Serine,
marker: PhantomData,
};
pub const Threonine: Self = Self {
aminoacid: AminoAcid::Threonine,
marker: PhantomData,
};
pub const Selenocysteine: Self = Self {
aminoacid: AminoAcid::Selenocysteine,
marker: PhantomData,
};
pub const Valine: Self = Self {
aminoacid: AminoAcid::Valine,
marker: PhantomData,
};
pub const Tryptophan: Self = Self {
aminoacid: AminoAcid::Tryptophan,
marker: PhantomData,
};
pub const Unknown: Self = Self {
aminoacid: AminoAcid::Unknown,
marker: PhantomData,
};
pub const Tyrosine: Self = Self {
aminoacid: AminoAcid::Tyrosine,
marker: PhantomData,
};
pub const UNIQUE_MASS_AMINO_ACIDS: &'static [Self] = &[
Self::Glycine,
Self::Alanine,
Self::Arginine,
Self::Asparagine,
Self::AsparticAcid,
Self::Cysteine,
Self::Glutamine,
Self::GlutamicAcid,
Self::Histidine,
Self::AmbiguousLeucine,
Self::Lysine,
Self::Methionine,
Self::Phenylalanine,
Self::Proline,
Self::Serine,
Self::Threonine,
Self::Tryptophan,
Self::Tyrosine,
Self::Valine,
Self::Selenocysteine,
Self::Pyrrolysine,
];
pub const CANONICAL_AMINO_ACIDS: &'static [Self] = &[
Self::Glycine,
Self::Alanine,
Self::Arginine,
Self::Asparagine,
Self::AsparticAcid,
Self::Cysteine,
Self::Glutamine,
Self::GlutamicAcid,
Self::Histidine,
Self::Leucine,
Self::Isoleucine,
Self::Lysine,
Self::Methionine,
Self::Phenylalanine,
Self::Proline,
Self::Serine,
Self::Threonine,
Self::Tryptophan,
Self::Tyrosine,
Self::Valine,
];
}
#[expect(non_upper_case_globals, missing_docs)]
impl CheckedAminoAcid<SemiAmbiguous> {
pub const B: Self = Self::AmbiguousAsparagine;
pub const Z: Self = Self::AmbiguousGlutamine;
pub const Asx: Self = Self::AmbiguousAsparagine;
pub const Glx: Self = Self::AmbiguousGlutamine;
pub const AmbiguousAsparagine: Self = Self {
aminoacid: AminoAcid::AmbiguousAsparagine,
marker: PhantomData,
};
pub const AmbiguousGlutamine: Self = Self {
aminoacid: AminoAcid::AmbiguousGlutamine,
marker: PhantomData,
};
}
impl From<AminoAcid> for CheckedAminoAcid<SemiAmbiguous> {
fn from(value: AminoAcid) -> Self {
Self::new(value)
}
}
impl From<&AminoAcid> for CheckedAminoAcid<SemiAmbiguous> {
fn from(value: &AminoAcid) -> Self {
Self::new(*value)
}
}
impl CheckedAminoAcid<SemiAmbiguous> {
pub const fn new(aminoacid: AminoAcid) -> Self {
Self {
aminoacid,
marker: PhantomData,
}
}
}
impl<T> CheckedAminoAcid<T> {
pub(super) const fn mark<M>(self) -> CheckedAminoAcid<M> {
CheckedAminoAcid {
aminoacid: self.aminoacid,
marker: PhantomData,
}
}
pub fn is_unambiguous(self) -> bool {
self.aminoacid != AminoAcid::AmbiguousAsparagine
&& self.aminoacid != AminoAcid::AmbiguousGlutamine
}
pub fn into_unambiguous(self) -> Option<CheckedAminoAcid<UnAmbiguous>> {
self.is_unambiguous().then_some(self.mark())
}
pub fn canonical_identical(self, rhs: Self) -> bool {
self.aminoacid.canonical_identical(rhs.aminoacid)
}
pub const fn aminoacid(self) -> AminoAcid {
self.aminoacid
}
}
impl<T> IsAminoAcid for CheckedAminoAcid<T> {
fn name(&self) -> std::borrow::Cow<'_, str> {
self.aminoacid.name()
}
fn three_letter_code(&self) -> Option<std::borrow::Cow<'_, str>> {
self.aminoacid.three_letter_code()
}
fn one_letter_code(&self) -> Option<char> {
self.aminoacid.one_letter_code()
}
fn pro_forma_definition(&self) -> std::borrow::Cow<'_, str> {
self.aminoacid.pro_forma_definition()
}
fn satellite_ion_fragments(
&self,
sequence_index: SequencePosition,
peptidoform_index: usize,
peptidoform_ion_index: usize,
) -> Option<std::borrow::Cow<'_, [(SatelliteLabel, MolecularFormula)]>> {
self.aminoacid.satellite_ion_fragments(
sequence_index,
peptidoform_index,
peptidoform_ion_index,
)
}
fn side_chain(
&self,
sequence_index: SequencePosition,
peptidoform_index: usize,
peptidoform_ion_index: usize,
) -> std::borrow::Cow<'_, Multi<MolecularFormula>> {
self.aminoacid
.side_chain(sequence_index, peptidoform_index, peptidoform_ion_index)
}
}
impl Chemical for CheckedAminoAcid<UnAmbiguous> {
fn formula_inner(
&self,
_sequence_index: SequencePosition,
_peptidoform_index: usize,
) -> MolecularFormula {
match self.aminoacid {
AminoAcid::Alanine => molecular_formula!(H 5 C 3 O 1 N 1),
AminoAcid::Arginine => molecular_formula!(H 12 C 6 O 1 N 4),
AminoAcid::Asparagine => molecular_formula!(H 6 C 4 O 2 N 2),
AminoAcid::AsparticAcid => molecular_formula!(H 5 C 4 O 3 N 1),
AminoAcid::Cysteine => molecular_formula!(H 5 C 3 O 1 N 1 S 1),
AminoAcid::Glutamine => molecular_formula!(H 8 C 5 O 2 N 2),
AminoAcid::GlutamicAcid => molecular_formula!(H 7 C 5 O 3 N 1),
AminoAcid::Glycine => molecular_formula!(H 3 C 2 O 1 N 1),
AminoAcid::Histidine => molecular_formula!(H 7 C 6 O 1 N 3),
AminoAcid::AmbiguousLeucine | AminoAcid::Isoleucine | AminoAcid::Leucine => {
molecular_formula!(H 11 C 6 O 1 N 1)
}
AminoAcid::Lysine => molecular_formula!(H 12 C 6 O 1 N 2),
AminoAcid::Methionine => molecular_formula!(H 9 C 5 O 1 N 1 S 1),
AminoAcid::Phenylalanine => molecular_formula!(H 9 C 9 O 1 N 1),
AminoAcid::Proline => molecular_formula!(H 7 C 5 O 1 N 1),
AminoAcid::Pyrrolysine => molecular_formula!(H 19 C 11 O 2 N 3),
AminoAcid::Selenocysteine => molecular_formula!(H 5 C 3 O 1 N 1 Se 1),
AminoAcid::Serine => molecular_formula!(H 5 C 3 O 2 N 1),
AminoAcid::Threonine => molecular_formula!(H 7 C 4 O 2 N 1),
AminoAcid::Tryptophan => molecular_formula!(H 10 C 11 O 1 N 2),
AminoAcid::Tyrosine => molecular_formula!(H 9 C 9 O 2 N 1),
AminoAcid::Valine => molecular_formula!(H 9 C 5 O 1 N 1),
AminoAcid::Unknown => molecular_formula!(),
_ => unreachable!(),
}
}
}
impl<T> MultiChemical for CheckedAminoAcid<T> {
fn formulas_inner(
&self,
sequence_index: SequencePosition,
peptidoform_index: usize,
peptidoform_ion_index: usize,
) -> Multi<MolecularFormula> {
self.into_unambiguous().map_or_else(|| {
let SequencePosition::Index(sequence_index) = sequence_index else {
panic!("Not allowed to call amino acid formulas with a terminal sequence index")
};
match self.aminoacid {
AminoAcid::AmbiguousAsparagine => vec![
molecular_formula!(H 6 C 4 O 2 N 2 (AmbiguousLabel::AminoAcid{option: AminoAcid::Asparagine, sequence_index, peptidoform_index, peptidoform_ion_index})),
molecular_formula!(H 5 C 4 O 3 N 1 (AmbiguousLabel::AminoAcid{option: AminoAcid::AsparticAcid, sequence_index, peptidoform_index, peptidoform_ion_index})),
]
.into(),
AminoAcid::AmbiguousGlutamine => vec![
molecular_formula!(H 8 C 5 O 2 N 2 (AmbiguousLabel::AminoAcid{option: AminoAcid::Glutamine, sequence_index, peptidoform_index, peptidoform_ion_index})),
molecular_formula!(H 7 C 5 O 3 N 1 (AmbiguousLabel::AminoAcid{option: AminoAcid::GlutamicAcid, sequence_index, peptidoform_index, peptidoform_ion_index})),
]
.into(),
_ => unreachable!(), }
}, |unambiguous| unambiguous.formula_inner(sequence_index, peptidoform_index).into())
}
}
impl<T> Copy for CheckedAminoAcid<T> {}
impl<T> Clone for CheckedAminoAcid<T> {
fn clone(&self) -> Self {
*self
}
}
impl<A, B> PartialEq<CheckedAminoAcid<B>> for CheckedAminoAcid<A> {
fn eq(&self, other: &CheckedAminoAcid<B>) -> bool {
self.aminoacid == other.aminoacid
}
}
impl<T> std::hash::Hash for CheckedAminoAcid<T> {
fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
self.aminoacid.hash(state);
}
}
impl<T> Eq for CheckedAminoAcid<T> {}
impl<T> Default for CheckedAminoAcid<T> {
fn default() -> Self {
Self {
aminoacid: AminoAcid::default(),
marker: PhantomData,
}
}
}
impl<T> std::fmt::Display for CheckedAminoAcid<T> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{}", self.pro_forma_definition())
}
}
impl std::str::FromStr for CheckedAminoAcid<SemiAmbiguous> {
type Err = ();
fn from_str(s: &str) -> Result<Self, Self::Err> {
Self::try_from(s)
}
}
impl TryFrom<&str> for CheckedAminoAcid<SemiAmbiguous> {
type Error = ();
fn try_from(value: &str) -> Result<Self, Self::Error> {
if value.is_ascii() && value.len() == 1 {
let ch = value.chars().next().unwrap();
ch.try_into()
} else {
Err(())
}
}
}
impl TryFrom<char> for CheckedAminoAcid<SemiAmbiguous> {
type Error = ();
fn try_from(value: char) -> Result<Self, Self::Error> {
if value.is_ascii() {
let num = value as u8;
num.try_into()
} else {
Err(())
}
}
}
impl TryFrom<&u8> for CheckedAminoAcid<SemiAmbiguous> {
type Error = ();
fn try_from(value: &u8) -> Result<Self, Self::Error> {
match value {
b'A' | b'a' => Ok(CheckedAminoAcid::<UnAmbiguous>::Alanine.mark()),
b'B' | b'b' => Ok(Self::AmbiguousAsparagine),
b'C' | b'c' => Ok(CheckedAminoAcid::<UnAmbiguous>::Cysteine.mark()),
b'D' | b'd' => Ok(CheckedAminoAcid::<UnAmbiguous>::AsparticAcid.mark()),
b'E' | b'e' => Ok(CheckedAminoAcid::<UnAmbiguous>::GlutamicAcid.mark()),
b'F' | b'f' => Ok(CheckedAminoAcid::<UnAmbiguous>::Phenylalanine.mark()),
b'G' | b'g' => Ok(CheckedAminoAcid::<UnAmbiguous>::Glycine.mark()),
b'H' | b'h' => Ok(CheckedAminoAcid::<UnAmbiguous>::Histidine.mark()),
b'I' | b'i' => Ok(CheckedAminoAcid::<UnAmbiguous>::Isoleucine.mark()),
b'J' | b'j' => Ok(CheckedAminoAcid::<UnAmbiguous>::AmbiguousLeucine.mark()),
b'K' | b'k' => Ok(CheckedAminoAcid::<UnAmbiguous>::Lysine.mark()),
b'L' | b'l' => Ok(CheckedAminoAcid::<UnAmbiguous>::Leucine.mark()),
b'M' | b'm' => Ok(CheckedAminoAcid::<UnAmbiguous>::Methionine.mark()),
b'N' | b'n' => Ok(CheckedAminoAcid::<UnAmbiguous>::Asparagine.mark()),
b'O' | b'o' => Ok(CheckedAminoAcid::<UnAmbiguous>::Pyrrolysine.mark()),
b'P' | b'p' => Ok(CheckedAminoAcid::<UnAmbiguous>::Proline.mark()),
b'Q' | b'q' => Ok(CheckedAminoAcid::<UnAmbiguous>::Glutamine.mark()),
b'R' | b'r' => Ok(CheckedAminoAcid::<UnAmbiguous>::Arginine.mark()),
b'S' | b's' => Ok(CheckedAminoAcid::<UnAmbiguous>::Serine.mark()),
b'T' | b't' => Ok(CheckedAminoAcid::<UnAmbiguous>::Threonine.mark()),
b'U' | b'u' => Ok(CheckedAminoAcid::<UnAmbiguous>::Selenocysteine.mark()),
b'V' | b'v' => Ok(CheckedAminoAcid::<UnAmbiguous>::Valine.mark()),
b'W' | b'w' => Ok(CheckedAminoAcid::<UnAmbiguous>::Tryptophan.mark()),
b'X' | b'x' => Ok(CheckedAminoAcid::<UnAmbiguous>::Unknown.mark()),
b'Y' | b'y' => Ok(CheckedAminoAcid::<UnAmbiguous>::Tyrosine.mark()),
b'Z' | b'z' => Ok(Self::AmbiguousGlutamine),
_ => Err(()),
}
}
}
impl TryFrom<u8> for CheckedAminoAcid<SemiAmbiguous> {
type Error = ();
fn try_from(value: u8) -> Result<Self, Self::Error> {
Self::try_from(&value)
}
}