use crate::chemistry::amino_acid::{amino_acid_composition, amino_acid_masses};
use crate::chemistry::constants::{MASS_CO, MASS_NH3, MASS_PROTON, MASS_WATER};
use crate::chemistry::formulas::calculate_mz;
use crate::chemistry::unimod::{
modification_atomic_composition, unimod_modifications_mass_numerical,
};
use crate::chemistry::utility::{find_unimod_patterns, unimod_sequence_to_tokens};
use crate::data::peptide::{FragmentType, PeptideProductIon, PeptideSequence};
use rayon::prelude::*;
use rayon::ThreadPoolBuilder;
use regex::Regex;
use statrs::distribution::{Binomial, Discrete};
use std::collections::HashMap;
pub fn calculate_peptide_mono_isotopic_mass(peptide_sequence: &PeptideSequence) -> f64 {
let amino_acid_masses = amino_acid_masses();
let modifications_mz_numerical = unimod_modifications_mass_numerical();
let pattern = Regex::new(r"\[UNIMOD:(\d+)]").unwrap();
let sequence = peptide_sequence.sequence.as_str();
let modifications: Vec<u32> = pattern
.find_iter(sequence)
.filter_map(|mat| mat.as_str()[8..mat.as_str().len() - 1].parse().ok())
.collect();
let sequence = pattern.replace_all(sequence, "");
let mut aa_counts = HashMap::new();
for char in sequence.chars() {
*aa_counts.entry(char).or_insert(0) += 1;
}
let mass_sequence: f64 = aa_counts
.iter()
.map(|(aa, &count)| {
amino_acid_masses.get(&aa.to_string()[..]).unwrap_or(&0.0) * count as f64
})
.sum();
let mass_modifications: f64 = modifications
.iter()
.map(|&mod_id| modifications_mz_numerical.get(&mod_id).unwrap_or(&0.0))
.sum();
mass_sequence + mass_modifications + MASS_WATER
}
pub fn calculate_peptide_product_ion_mono_isotopic_mass(sequence: &str, kind: FragmentType) -> f64 {
let (sequence, modifications) = find_unimod_patterns(sequence);
if sequence.is_empty() {
return 0.0;
}
let amino_acid_masses = amino_acid_masses();
let mass_sequence: f64 = sequence
.chars()
.map(|aa| amino_acid_masses.get(&aa.to_string()[..]).unwrap_or(&0.0))
.sum();
let mass_modifications: f64 = modifications.iter().sum();
let mass = mass_sequence + mass_modifications + MASS_WATER;
let mass = match kind {
FragmentType::A => mass - MASS_CO - MASS_WATER,
FragmentType::B => mass - MASS_WATER,
FragmentType::C => mass + MASS_NH3 - MASS_WATER,
FragmentType::X => mass + MASS_CO - 2.0 * MASS_PROTON,
FragmentType::Y => mass,
FragmentType::Z => mass - MASS_NH3,
};
mass
}
pub fn calculate_product_ion_mz(sequence: &str, kind: FragmentType, charge: Option<i32>) -> f64 {
let mass = calculate_peptide_product_ion_mono_isotopic_mass(sequence, kind);
calculate_mz(mass, charge.unwrap_or(1))
}
pub fn calculate_amino_acid_composition(sequence: &str) -> HashMap<String, i32> {
let mut composition = HashMap::new();
for char in sequence.chars() {
*composition.entry(char.to_string()).or_insert(0) += 1;
}
composition
}
pub fn peptide_sequence_to_atomic_composition(
peptide_sequence: &PeptideSequence,
) -> HashMap<&'static str, i32> {
let token_sequence = unimod_sequence_to_tokens(peptide_sequence.sequence.as_str(), false);
let mut collection: HashMap<&'static str, i32> = HashMap::new();
let aa_compositions = amino_acid_composition();
let mod_compositions = modification_atomic_composition();
for token in token_sequence {
if token.len() == 1 {
let char = token.chars().next().unwrap();
if let Some(composition) = aa_compositions.get(&char) {
for (key, value) in composition.iter() {
*collection.entry(key).or_insert(0) += *value;
}
}
} else {
if let Some(composition) = mod_compositions.get(&token) {
for (key, value) in composition.iter() {
*collection.entry(key).or_insert(0) += *value;
}
}
}
}
*collection.entry("H").or_insert(0) += 2; *collection.entry("O").or_insert(0) += 1;
collection
}
pub fn atomic_product_ion_composition(product_ion: &PeptideProductIon) -> Vec<(&str, i32)> {
let mut composition = peptide_sequence_to_atomic_composition(&product_ion.ion.sequence);
match product_ion.kind {
FragmentType::A => {
*composition.entry("H").or_insert(0) -= 2;
*composition.entry("O").or_insert(0) -= 2;
*composition.entry("C").or_insert(0) -= 1;
}
FragmentType::B => {
*composition.entry("H").or_insert(0) -= 2;
*composition.entry("O").or_insert(0) -= 1;
}
FragmentType::C => {
*composition.entry("H").or_insert(0) += 1;
*composition.entry("N").or_insert(0) += 1;
*composition.entry("O").or_insert(0) -= 1;
}
FragmentType::X => {
*composition.entry("C").or_insert(0) += 1; *composition.entry("O").or_insert(0) += 1; *composition.entry("H").or_insert(0) -= 2; }
FragmentType::Y => (),
FragmentType::Z => {
*composition.entry("H").or_insert(0) -= 3;
*composition.entry("N").or_insert(0) -= 1;
}
}
composition.iter().map(|(k, v)| (*k, *v)).collect()
}
pub fn calculate_complementary_fragment_composition(
precursor_composition: &HashMap<&str, i32>,
fragment_composition: &HashMap<&str, i32>,
) -> HashMap<String, i32> {
let mut complementary: HashMap<String, i32> = HashMap::new();
for (element, &prec_count) in precursor_composition.iter() {
let frag_count = fragment_composition.get(element).copied().unwrap_or(0);
let diff = prec_count - frag_count;
if diff != 0 {
complementary.insert(element.to_string(), diff);
}
}
for (element, &frag_count) in fragment_composition.iter() {
if !precursor_composition.contains_key(element) {
complementary.insert(element.to_string(), -frag_count);
}
}
complementary
}
pub fn fragments_to_composition(
product_ions: Vec<PeptideProductIon>,
num_threads: usize,
) -> Vec<Vec<(String, i32)>> {
let thread_pool = ThreadPoolBuilder::new()
.num_threads(num_threads)
.build()
.unwrap();
let result = thread_pool.install(|| {
product_ions
.par_iter()
.map(|ion| atomic_product_ion_composition(ion))
.map(|composition| {
composition
.iter()
.map(|(k, v)| (k.to_string(), *v))
.collect()
})
.collect()
});
result
}
pub fn get_num_protonizable_sites(sequence: &str) -> usize {
let mut sites = 1; for s in sequence.chars() {
match s {
'H' | 'R' | 'K' => sites += 1,
_ => {}
}
}
sites
}
pub fn simulate_charge_state_for_sequence(
sequence: &str,
max_charge: Option<usize>,
charged_probability: Option<f64>,
) -> Vec<f64> {
let charged_prob = charged_probability.unwrap_or(0.8);
let max_charge = max_charge.unwrap_or(4)+1;
let num_protonizable_sites = get_num_protonizable_sites(sequence);
let mut charge_state_probs = vec![0.0; max_charge];
let binom = Binomial::new(charged_prob, num_protonizable_sites as u64).unwrap();
for charge in 0..max_charge {
charge_state_probs[charge] = binom.pmf(charge as u64);
}
charge_state_probs
}
pub fn simulate_charge_states_for_sequences(
sequences: Vec<&str>,
num_threads: usize,
max_charge: Option<usize>,
charged_probability: Option<f64>,
) -> Vec<Vec<f64>> {
let pool = ThreadPoolBuilder::new()
.num_threads(num_threads)
.build()
.unwrap();
pool.install(|| {
sequences
.par_iter()
.map(|sequence| {
simulate_charge_state_for_sequence(sequence, max_charge, charged_probability)
})
.collect()
})
}