use std::sync::LazyLock;
use itertools::Itertools;
use mzcore::{
chemistry::{ChargeRange, NeutralLoss},
prelude::{AminoAcid, MultiChemical, Peptidoform, SequenceElement, SequencePosition},
sequence::{BACKBONE, PeptidePosition},
};
use serde::{Deserialize, Serialize};
use crate::{annotation::model::GlycanModel, fragment::Variant};
pub type ImmoniumSettings = (ChargeRange, Vec<(Vec<AminoAcid>, Vec<NeutralLoss>)>);
#[non_exhaustive]
#[allow(clippy::type_complexity)]
#[derive(Clone, Debug, Deserialize, PartialEq, Serialize)]
pub struct FragmentationModel {
pub a: PrimaryIonSeries,
pub b: PrimaryIonSeries,
pub c: PrimaryIonSeries,
pub d: SatelliteIonSeries,
pub v: SatelliteIonSeries,
pub w: SatelliteIonSeries,
pub x: PrimaryIonSeries,
pub y: PrimaryIonSeries,
pub z: PrimaryIonSeries,
pub precursor: (
Vec<NeutralLoss>,
Vec<(Vec<AminoAcid>, Vec<NeutralLoss>)>,
(u8, Option<Vec<AminoAcid>>),
ChargeRange,
),
pub immonium: Option<ImmoniumSettings>,
pub modification_specific_neutral_losses: bool,
pub modification_specific_diagnostic_ions: Option<ChargeRange>,
pub glycan: GlycanModel,
pub allow_cross_link_cleavage: bool,
}
#[derive(Clone, Debug, Deserialize, Eq, Hash, PartialEq, Serialize)]
pub struct SatelliteIonSeries {
pub location: SatelliteLocation,
pub neutral_losses: Vec<NeutralLoss>,
pub amino_acid_neutral_losses: Vec<(Vec<AminoAcid>, Vec<NeutralLoss>)>,
pub amino_acid_side_chain_losses: (u8, Option<Vec<AminoAcid>>),
pub charge_range: ChargeRange,
pub allowed_variants: Vec<Variant>,
}
impl SatelliteIonSeries {
#[must_use]
pub fn base() -> Self {
Self {
location: SatelliteLocation {
rules: Vec::new(),
base: Some(0),
},
..Self::default()
}
}
#[must_use]
pub fn location(self, location: SatelliteLocation) -> Self {
Self { location, ..self }
}
#[must_use]
pub fn neutral_losses(self, neutral_losses: Vec<NeutralLoss>) -> Self {
Self {
neutral_losses,
..self
}
}
#[must_use]
pub fn amino_acid_neutral_losses(
self,
amino_acid_neutral_losses: Vec<(Vec<AminoAcid>, Vec<NeutralLoss>)>,
) -> Self {
Self {
amino_acid_neutral_losses,
..self
}
}
#[must_use]
pub fn amino_acid_side_chain_losses(
self,
amino_acid_side_chain_losses: (u8, Option<Vec<AminoAcid>>),
) -> Self {
Self {
amino_acid_side_chain_losses,
..self
}
}
#[must_use]
pub fn charge_range(self, charge_range: ChargeRange) -> Self {
Self {
charge_range,
..self
}
}
#[must_use]
pub fn variants(self, allowed_variants: Vec<i8>) -> Self {
Self {
allowed_variants,
..self
}
}
}
impl Default for SatelliteIonSeries {
fn default() -> Self {
Self {
location: SatelliteLocation::default(),
neutral_losses: Vec::new(),
amino_acid_neutral_losses: Vec::new(),
amino_acid_side_chain_losses: (0, None),
charge_range: ChargeRange::ONE_TO_PRECURSOR,
allowed_variants: vec![0],
}
}
}
#[derive(Clone, Debug, Deserialize, Eq, Hash, PartialEq, Serialize)]
pub struct PrimaryIonSeries {
pub location: Location,
pub neutral_losses: Vec<NeutralLoss>,
pub amino_acid_neutral_losses: Vec<(Vec<AminoAcid>, Vec<NeutralLoss>)>,
pub amino_acid_side_chain_losses: (u8, Option<Vec<AminoAcid>>),
pub charge_range: ChargeRange,
pub allowed_variants: Vec<Variant>,
}
impl PrimaryIonSeries {
pub fn none() -> Self {
Self {
location: Location::None,
..Default::default()
}
}
#[must_use]
pub fn location(self, location: Location) -> Self {
Self { location, ..self }
}
#[must_use]
pub fn neutral_losses(self, neutral_losses: Vec<NeutralLoss>) -> Self {
Self {
neutral_losses,
..self
}
}
#[must_use]
pub fn amino_acid_neutral_losses(
self,
amino_acid_neutral_losses: Vec<(Vec<AminoAcid>, Vec<NeutralLoss>)>,
) -> Self {
Self {
amino_acid_neutral_losses,
..self
}
}
#[must_use]
pub fn amino_acid_side_chain_losses(
self,
amino_acid_side_chain_losses: (u8, Option<Vec<AminoAcid>>),
) -> Self {
Self {
amino_acid_side_chain_losses,
..self
}
}
#[must_use]
pub fn charge_range(self, charge_range: ChargeRange) -> Self {
Self {
charge_range,
..self
}
}
#[must_use]
pub fn variants(self, allowed_variants: Vec<i8>) -> Self {
Self {
allowed_variants,
..self
}
}
}
impl Default for PrimaryIonSeries {
fn default() -> Self {
Self {
location: Location::All,
neutral_losses: Vec::new(),
amino_acid_neutral_losses: Vec::new(),
amino_acid_side_chain_losses: (0, None),
charge_range: ChargeRange::ONE_TO_PRECURSOR,
allowed_variants: vec![0],
}
}
}
impl FragmentationModel {
#[must_use]
pub fn a(self, a: PrimaryIonSeries) -> Self {
Self { a, ..self }
}
#[must_use]
pub fn b(self, b: PrimaryIonSeries) -> Self {
Self { b, ..self }
}
#[must_use]
pub fn c(self, c: PrimaryIonSeries) -> Self {
Self { c, ..self }
}
#[must_use]
pub fn d(self, d: SatelliteIonSeries) -> Self {
Self { d, ..self }
}
#[must_use]
pub fn v(self, v: SatelliteIonSeries) -> Self {
Self { v, ..self }
}
#[must_use]
pub fn w(self, w: SatelliteIonSeries) -> Self {
Self { w, ..self }
}
#[must_use]
pub fn x(self, x: PrimaryIonSeries) -> Self {
Self { x, ..self }
}
#[must_use]
pub fn y(self, y: PrimaryIonSeries) -> Self {
Self { y, ..self }
}
#[must_use]
pub fn z(self, z: PrimaryIonSeries) -> Self {
Self { z, ..self }
}
#[must_use]
pub fn glycan(self, glycan: GlycanModel) -> Self {
Self { glycan, ..self }
}
#[must_use]
pub fn precursor(
self,
neutral_loss: Vec<NeutralLoss>,
amino_acid_specific_neutral_losses: Vec<(Vec<AminoAcid>, Vec<NeutralLoss>)>,
amino_acid_side_chain_losses: (u8, Option<Vec<AminoAcid>>),
charges: ChargeRange,
) -> Self {
Self {
precursor: (
neutral_loss,
amino_acid_specific_neutral_losses,
amino_acid_side_chain_losses,
charges,
),
..self
}
}
#[must_use]
pub fn immonium(self, state: Option<ImmoniumSettings>) -> Self {
Self {
immonium: state,
..self
}
}
#[must_use]
pub fn modification_specific_neutral_losses(self, state: bool) -> Self {
Self {
modification_specific_neutral_losses: state,
..self
}
}
#[must_use]
pub fn modification_specific_diagnostic_ions(self, state: Option<ChargeRange>) -> Self {
Self {
modification_specific_diagnostic_ions: state,
..self
}
}
#[must_use]
pub fn allow_cross_link_cleavage(self, state: bool) -> Self {
Self {
allow_cross_link_cleavage: state,
..self
}
}
}
#[derive(
Clone, Copy, Eq, PartialEq, Ord, PartialOrd, Hash, Default, Debug, Serialize, Deserialize,
)]
pub enum Location {
SkipN(usize),
SkipNC(usize, usize),
TakeN {
skip: usize,
take: usize,
},
SkipC(usize),
TakeC(usize),
All,
#[default]
None,
}
impl Location {
pub const fn possible(self, position: PeptidePosition) -> bool {
let SequencePosition::Index(sequence_index) = position.sequence_index else {
panic!("Not allowed to call possible with a terminal PeptidePosition")
};
match self {
Self::SkipN(n) => sequence_index >= n,
Self::SkipNC(n, c) => {
sequence_index >= n && position.sequence_length - sequence_index > c
}
Self::TakeN { skip, take } => sequence_index >= skip && sequence_index < skip + take,
Self::SkipC(n) => position.sequence_length - sequence_index > n,
Self::TakeC(n) => position.sequence_length - sequence_index <= n,
Self::All => position.series_number != position.sequence_length,
Self::None => false,
}
}
}
#[derive(Clone, Debug, Default, Deserialize, Eq, Hash, Ord, PartialEq, PartialOrd, Serialize)]
pub struct SatelliteLocation {
pub rules: Vec<(Vec<AminoAcid>, u8)>,
pub base: Option<u8>,
}
impl SatelliteLocation {
pub fn possible<Complexity>(
&self,
position: PeptidePosition,
peptidoform: &Peptidoform<Complexity>,
c_terminal: bool,
) -> Vec<(AminoAcid, u8)> {
let SequencePosition::Index(sequence_index) = position.sequence_index else {
panic!("Not allowed to call possible with a terminal PeptidePosition")
};
let mut output = Vec::new();
let max_distance = match (self.base, self.rules.iter().map(|r| r.1).max()) {
(Some(b), Some(r)) => b.max(r) + 1,
(Some(b), None) => b + 1,
(None, Some(r)) => r + 1,
(None, None) => return Vec::new(),
};
let range = if c_terminal {
sequence_index
..sequence_index
.saturating_add(max_distance as usize)
.min(peptidoform.len() - 1)
} else {
sequence_index.saturating_sub(max_distance as usize)..sequence_index
};
for (index, seq) in peptidoform[range].iter().enumerate() {
if let Ok(distance) = u8::try_from(if c_terminal {
index
} else {
max_distance as usize - index
}) {
let mut allowed = None;
for rule in &self.rules {
if rule.0.contains(&seq.aminoacid.aminoacid()) {
allowed = Some(distance <= rule.1);
break;
}
}
if allowed
.or_else(|| self.base.map(|b| distance <= b))
.is_some_and(|a| a)
{
output.push((seq.aminoacid.aminoacid(), distance));
}
}
}
output
}
}
pub(crate) fn get_all_sidechain_losses<Complexity>(
slice: &[SequenceElement<Complexity>],
settings: &(u8, Option<Vec<AminoAcid>>),
) -> Vec<Vec<NeutralLoss>> {
if settings.0 == 0 {
Vec::new()
} else {
let options: Vec<NeutralLoss> = slice
.iter()
.filter_map(|seq| {
settings
.1
.as_ref()
.is_none_or(|aa| aa.contains(&seq.aminoacid.aminoacid()))
.then_some(seq.aminoacid.aminoacid())
})
.unique()
.flat_map(|aa| {
aa.formulas()
.iter()
.map(|f| NeutralLoss::SideChainLoss(f - LazyLock::force(&BACKBONE), aa))
.filter(|l| !l.is_empty())
.collect::<Vec<_>>()
})
.collect();
(1..=settings.0)
.flat_map(|k| options.iter().combinations(k as usize))
.map(|o| o.into_iter().cloned().collect_vec())
.collect()
}
}
#[test]
#[allow(clippy::missing_panics_doc)]
fn side_chain_losses() {
let peptide = Peptidoform::pro_forma("FGGGTKLELKR", &mzcore::ontology::STATIC_ONTOLOGIES)
.unwrap()
.0
.into_simple_linear()
.unwrap();
assert_eq!(
0,
get_all_sidechain_losses(peptide.sequence(), &(0, None)).len()
);
assert_eq!(
1,
get_all_sidechain_losses(
peptide.sequence(),
&(1, Some(vec![AminoAcid::Phenylalanine]))
)
.len()
);
assert_eq!(
0,
get_all_sidechain_losses(peptide.sequence(), &(1, Some(vec![AminoAcid::Glycine]))).len()
);
assert_eq!(
1,
get_all_sidechain_losses(peptide.sequence(), &(1, Some(vec![AminoAcid::Leucine]))).len()
);
assert_eq!(
6,
get_all_sidechain_losses(peptide.sequence(), &(1, None)).len()
);
assert_eq!(
3,
dbg!(get_all_sidechain_losses(
peptide.sequence(),
&(2, Some(vec![AminoAcid::Phenylalanine, AminoAcid::Leucine]))
))
.len()
);
}