use std::collections::HashMap;
use itertools::Itertools;
use mzcv::CVStructure;
use crate::{
chemistry::{Chemical, MassMode, MolecularFormula},
glycan::MonoSaccharide,
ontology::{Ontologies, Ontology},
quantities::{Tolerance, WithinTolerance},
sequence::{
AminoAcid, GnoComposition, Modification, Position, SimpleModification,
SimpleModificationInner,
},
system::{Mass, Ratio, ratio::ppm},
};
use super::Peptidoform;
pub fn modification_search_mass<'a>(
mass: Mass,
tolerance: Tolerance<Mass>,
positions: Option<&'a [(Vec<AminoAcid>, Position)]>,
mass_mode: MassMode,
ontologies: &'a Ontologies,
) -> impl Iterator<Item = SimpleModification> + 'a {
ontologies
.data(&[])
.filter(move |m| tolerance.within(&mass, &m.formula().mass(mass_mode)))
.filter(move |m| {
positions.is_none_or(|positions| {
positions.iter().any(|(aas, p)| {
aas.iter()
.any(|aa| m.is_possible_aa(*aa, *p).any_possible())
})
})
})
}
pub fn modification_search_formula<'a>(
formula: &'a MolecularFormula,
ontologies: &'a Ontologies,
) -> impl Iterator<Item = SimpleModification> + 'a {
ontologies.data(&[]).filter(|m| *formula == m.formula())
}
pub fn modification_search_glycan(
glycan: &[(MonoSaccharide, isize)],
search_topologies: bool,
ontologies: &Ontologies,
) -> impl Iterator<Item = SimpleModification> {
let search = MonoSaccharide::search_composition(glycan);
ontologies.gnome().data().iter_data().filter(move |m| {
if let SimpleModificationInner::Gno {
composition: GnoComposition::Topology(structure),
..
} = &**m
{
search_topologies
&& MonoSaccharide::search_composition(&structure.composition()) == *search
} else if let SimpleModificationInner::Gno {
composition: GnoComposition::Composition(composition),
..
} = &**m
{
MonoSaccharide::search_composition(composition) == *search
} else {
false
}
})
}
#[derive(Clone, Debug)]
pub struct PeptideModificationSearch<'ontologies> {
force_closest: bool,
replace_formulas: bool,
allow_terminal_redefinition: bool,
mass_mode: MassMode,
tolerance: Tolerance<Mass>,
modifications: Vec<SimpleModification>,
selection: Vec<Ontology>,
ontologies: Option<&'ontologies Ontologies>,
cache: HashMap<(Position, Option<AminoAcid>, SimpleModification), Option<SimpleModification>>,
}
impl Default for PeptideModificationSearch<'_> {
fn default() -> Self {
Self {
force_closest: false,
replace_formulas: false,
allow_terminal_redefinition: true,
mass_mode: MassMode::Monoisotopic,
tolerance: Tolerance::Relative(Ratio::new::<ppm>(10.0).into()),
modifications: Vec::new(),
selection: Vec::new(),
ontologies: None,
cache: HashMap::new(),
}
}
}
impl<'ontologies> PeptideModificationSearch<'ontologies> {
pub fn in_modifications(modifications: Vec<SimpleModification>) -> Self {
Self {
modifications,
..Self::default()
}
}
pub fn in_ontologies(selection: Vec<Ontology>, ontologies: &'ontologies Ontologies) -> Self {
Self {
selection,
ontologies: Some(ontologies),
..Self::default()
}
}
#[must_use]
pub fn tolerance(self, tolerance: Tolerance<Mass>) -> Self {
Self {
tolerance,
cache: HashMap::new(),
..self
}
}
#[must_use]
pub fn mass_mode(self, mass_mode: MassMode) -> Self {
Self {
mass_mode,
cache: HashMap::new(),
..self
}
}
#[must_use]
pub fn force_closest(self, force_closest: bool) -> Self {
Self {
force_closest,
cache: HashMap::new(),
..self
}
}
#[must_use]
pub fn replace_formulas(self, replace_formulas: bool) -> Self {
Self {
replace_formulas,
cache: HashMap::new(),
..self
}
}
#[must_use]
pub fn allow_terminal_redefinition(self, allow_terminal_redefinition: bool) -> Self {
Self {
allow_terminal_redefinition,
cache: HashMap::new(),
..self
}
}
#[expect(clippy::similar_names)]
pub fn search<Complexity>(
&mut self,
mut peptide: Peptidoform<Complexity>,
) -> Peptidoform<Complexity> {
fn find_replacement_all_positions(
settings: &mut PeptideModificationSearch,
n_term: bool,
c_term: bool,
aminoacid: Option<AminoAcid>,
in_place: &SimpleModification,
) -> Option<(SimpleModification, Position)> {
if !settings.allow_terminal_redefinition || (!n_term && !c_term) {
settings
.find_replacement(Position::Anywhere, aminoacid, in_place)
.map(|r| (r, Position::Anywhere))
} else if n_term && c_term {
settings
.find_replacement(Position::Anywhere, aminoacid, in_place)
.map(|r| (r, Position::Anywhere))
.or_else(|| {
settings
.find_replacement(Position::AnyNTerm, aminoacid, in_place)
.map(|r| (r, Position::AnyNTerm))
})
.or_else(|| {
settings
.find_replacement(Position::AnyCTerm, aminoacid, in_place)
.map(|r| (r, Position::AnyCTerm))
})
} else if n_term {
settings
.find_replacement(Position::Anywhere, aminoacid, in_place)
.map(|r| (r, Position::Anywhere))
.or_else(|| {
settings
.find_replacement(Position::AnyNTerm, aminoacid, in_place)
.map(|r| (r, Position::AnyNTerm))
})
} else {
settings
.find_replacement(Position::Anywhere, aminoacid, in_place)
.map(|r| (r, Position::Anywhere))
.or_else(|| {
settings
.find_replacement(Position::AnyCTerm, aminoacid, in_place)
.map(|r| (r, Position::AnyCTerm))
})
}
}
fn find_replacement_modification(
settings: &mut PeptideModificationSearch,
position: Position,
aminoacid: Option<AminoAcid>,
in_place: &Modification,
) -> Option<Modification> {
match in_place {
Modification::Simple(simple) => settings
.find_replacement(position, aminoacid, simple)
.map(Modification::Simple),
Modification::CrossLink { .. } | Modification::Ambiguous { .. } => None, }
}
let mut n_term = peptide
.get_n_term()
.iter()
.map(|m| {
find_replacement_modification(
self,
Position::AnyNTerm,
peptide.sequence().first().map(|p| p.aminoacid.aminoacid()),
m,
)
.unwrap_or_else(|| m.clone())
})
.collect_vec();
let mut c_term = peptide
.get_c_term()
.iter()
.map(|m| {
find_replacement_modification(
self,
Position::AnyCTerm,
peptide.sequence().last().map(|p| p.aminoacid.aminoacid()),
m,
)
.unwrap_or_else(|| m.clone())
})
.collect_vec();
let len = peptide.len();
for (index, position) in peptide.sequence_mut().iter_mut().enumerate() {
let is_n_term = index == 0;
let is_c_term = index == len;
let mut remove = None;
for (i, m) in position.modifications.iter_mut().enumerate() {
match m {
Modification::Simple(modification) => {
if let Some((replace, location)) = find_replacement_all_positions(
self,
is_n_term,
is_c_term,
Some(position.aminoacid.aminoacid()),
modification,
) {
if location == Position::AnyNTerm {
n_term.push(Modification::Simple(replace));
remove = Some(i);
} else if location == Position::AnyCTerm {
c_term.push(Modification::Simple(replace));
remove = Some(i);
} else if location == Position::Anywhere {
*m = Modification::Simple(replace);
}
}
}
Modification::Ambiguous { .. } | Modification::CrossLink { .. } => (), }
}
if let Some(remove) = remove.take() {
position.modifications.remove(remove);
}
}
for m in peptide.get_labile_mut_inner() {
if let Some(replace) = self.find_replacement(Position::Anywhere, None, m) {
*m = replace;
}
}
peptide.n_term(n_term).c_term(c_term)
}
fn find_replacement(
&mut self,
position: Position,
aminoacid: Option<AminoAcid>,
in_place: &SimpleModification,
) -> Option<SimpleModification> {
if matches!(&**in_place, SimpleModificationInner::Mass(_, _, _))
|| self.replace_formulas && matches!(&**in_place, SimpleModificationInner::Formula(_))
{
self.cache
.entry((position, aminoacid, in_place.clone()))
.or_insert_with(|| {
Self::find_replacement_uncached(
self.mass_mode,
self.tolerance,
self.replace_formulas,
self.force_closest,
&self.modifications,
&self.selection,
self.ontologies,
position,
aminoacid,
in_place,
)
})
.clone()
} else {
None
}
}
#[expect(clippy::missing_panics_doc, clippy::too_many_arguments)]
fn find_replacement_uncached(
mass_mode: MassMode,
tolerance: Tolerance<Mass>,
replace_formulas: bool,
force_closest: bool,
modifications: &[SimpleModification],
selection: &[Ontology],
ontologies: Option<&Ontologies>,
position: Position,
aminoacid: Option<AminoAcid>,
in_place: &SimpleModification,
) -> Option<SimpleModification> {
let check_matches =
|in_place: &SimpleModification, provided: &SimpleModification| match &**in_place {
SimpleModificationInner::Mass(_, mass, _) => {
tolerance.within(&mass.into_inner(), &provided.formula().mass(mass_mode))
}
SimpleModificationInner::Formula(formula) if replace_formulas => {
*formula == provided.formula()
}
_ => false,
};
let options: Vec<_> = if modifications.is_empty() {
ontologies
.iter()
.flat_map(|o| {
o.data(selection)
.filter(|modification| {
aminoacid.is_none_or(|aa| {
modification.is_possible_aa(aa, position).any_possible()
})
})
.filter(|modification| check_matches(in_place, modification))
})
.collect()
} else {
modifications
.iter()
.filter(|modification| {
aminoacid
.is_none_or(|aa| modification.is_possible_aa(aa, position).any_possible())
})
.filter(|modification| check_matches(in_place, modification))
.cloned()
.collect()
};
match options.len() {
0 => None,
1 => Some(options[0].clone()),
_ if !force_closest => None,
_ => {
let distances: Vec<_> = options
.iter()
.map(|m| {
in_place
.formula()
.mass(mass_mode)
.ppm(m.formula().mass(mass_mode))
.value
})
.collect();
let max: f64 = distances
.iter()
.copied()
.max_by(|a, b| (*a).total_cmp(b))
.unwrap(); let filtered: Vec<_> = distances
.iter()
.copied()
.enumerate()
.filter(|(_, d)| (*d - max).abs() < f64::EPSILON)
.collect();
if filtered.len() == 1 {
Some(options[filtered[0].0].clone())
} else {
None
}
}
}
}
}
#[test]
#[expect(clippy::missing_panics_doc)]
fn test_replacement() {
let mut search = PeptideModificationSearch::in_ontologies(
vec![Ontology::Unimod],
&crate::ontology::STATIC_ONTOLOGIES,
)
.replace_formulas(true);
let (peptide, _) = Peptidoform::pro_forma(
"MSFNELT[79.9663]ESNKKSLM[+15.9949]E",
&crate::ontology::STATIC_ONTOLOGIES,
)
.unwrap();
let (expected, _) = Peptidoform::pro_forma(
"MSFNELT[Phospho]ESNKKSLM[Oxidation]E",
&crate::ontology::STATIC_ONTOLOGIES,
)
.unwrap();
assert_eq!(search.search(peptide), expected);
let (peptide, _) = Peptidoform::pro_forma(
"Q[-17.02655]NKKSLM[+15.9949]E",
&crate::ontology::STATIC_ONTOLOGIES,
)
.unwrap();
let (expected, _) = Peptidoform::pro_forma(
"[Gln->pyro-glu]-QNKKSLM[Oxidation]E",
&crate::ontology::STATIC_ONTOLOGIES,
)
.unwrap();
assert_eq!(search.search(peptide), expected);
let (peptide, _) = Peptidoform::pro_forma(
"M[Formula:O1]KSLM[+15.9949]E",
&crate::ontology::STATIC_ONTOLOGIES,
)
.unwrap();
let (expected, _) = Peptidoform::pro_forma(
"M[Oxidation]KSLM[Oxidation]E",
&crate::ontology::STATIC_ONTOLOGIES,
)
.unwrap();
assert_eq!(search.search(peptide), expected);
}