use std::{
fmt::Write,
hash::Hash,
num::NonZeroU16,
ops::{Add, AddAssign, Mul, Neg, Sub, SubAssign},
};
use ordered_float::OrderedFloat;
use serde::{Deserialize, Serialize};
use thin_vec::ThinVec;
use crate::{
chemistry::Element,
glycan::{GlycanPosition, MonoSaccharide},
quantities::Multi,
sequence::{AminoAcid, CrossLinkName, SequencePosition},
};
#[allow(clippy::unsafe_derive_deserialize)]
#[derive(Clone, Debug, Default, Deserialize, Eq, Hash, Ord, PartialEq, PartialOrd, Serialize)]
pub struct MolecularFormula {
pub(in super::super) elements: ThinVec<(Element, Option<NonZeroU16>, i32)>,
pub(in super::super) additional_mass: OrderedFloat<f64>,
#[serde(default)]
pub(in super::super) labels: ThinVec<AmbiguousLabel>,
}
#[derive(Clone, Debug, Deserialize, Eq, Hash, Ord, PartialEq, PartialOrd, Serialize)]
pub enum AmbiguousLabel {
AminoAcid {
option: AminoAcid,
sequence_index: usize,
peptidoform_index: usize,
peptidoform_ion_index: usize,
},
Modification {
id: usize,
sequence_index: SequencePosition,
peptidoform_index: usize,
peptidoform_ion_index: usize,
},
ChargeCarrier(MolecularFormula),
CrossLinkBound(CrossLinkName),
CrossLinkBroken(CrossLinkName, MolecularFormula),
GlycanFragment(Vec<GlycanPosition>),
GlycanFragmentComposition(Vec<(MonoSaccharide, isize)>),
}
pub trait Chemical {
#[allow(dead_code)]
fn formula(&self) -> MolecularFormula {
self.formula_inner(SequencePosition::default(), 0)
}
fn formula_inner(
&self,
sequence_index: SequencePosition,
peptidoform_index: usize,
) -> MolecularFormula;
}
impl<T: Chemical> Chemical for &[T] {
fn formula_inner(
&self,
sequence_index: SequencePosition,
peptidoform_index: usize,
) -> MolecularFormula {
self.iter()
.map(|f| f.formula_inner(sequence_index, peptidoform_index))
.sum()
}
}
impl<T: Chemical> Chemical for &Vec<T> {
fn formula_inner(
&self,
sequence_index: SequencePosition,
peptidoform_index: usize,
) -> MolecularFormula {
self.iter()
.map(|f| f.formula_inner(sequence_index, peptidoform_index))
.sum()
}
}
pub trait MultiChemical {
fn formulas(&self) -> Multi<MolecularFormula> {
self.formulas_inner(SequencePosition::default(), 0, 0)
}
fn formulas_inner(
&self,
sequence_index: SequencePosition,
peptidoform_index: usize,
peptidoform_ion_index: usize,
) -> Multi<MolecularFormula>;
#[allow(dead_code)]
fn charge(&self) -> Option<crate::system::isize::Charge> {
self.formulas()
.first()
.map(MolecularFormula::charge)
.filter(|c| c.value != 0)
}
fn single_formula(&self) -> Option<MolecularFormula> {
let formulas = self.formulas();
(formulas.len() == 1).then_some(formulas.to_vec().pop().unwrap())
}
#[allow(dead_code)]
fn single_formula_inner(
&self,
sequence_index: SequencePosition,
peptidoform_index: usize,
peptidoform_ion_index: usize,
) -> Option<MolecularFormula> {
let formulas =
self.formulas_inner(sequence_index, peptidoform_index, peptidoform_ion_index);
(formulas.len() == 1).then_some(formulas.to_vec().pop().unwrap())
}
}
impl MolecularFormula {
pub fn new(
elements: &[(Element, Option<NonZeroU16>, i32)],
labels: &[AmbiguousLabel],
) -> Option<Self> {
if elements.iter().any(|e| !e.0.is_valid(e.1)) {
None
} else {
let result = Self {
elements: elements.into(),
additional_mass: 0.0.into(),
labels: labels.into(),
};
Some(result.simplify())
}
}
#[must_use]
fn simplify(mut self) -> Self {
self.elements.retain(|el| el.2 != 0);
self.elements.sort_by(|a, b| {
if a.0 == b.0 {
a.1.cmp(&b.1)
} else {
a.0.cmp(&b.0)
}
});
let mut max = self.elements.len().saturating_sub(1);
let mut index = 0;
while index < max {
let this = self.elements[index];
let next = self.elements[index + 1];
if this.0 == next.0 && this.1 == next.1 {
self.elements[index].2 += next.2;
self.elements.remove(index + 1);
max = max.saturating_sub(1);
} else {
index += 1;
}
}
self.elements
.retain(|el: &(Element, Option<std::num::NonZero<u16>>, i32)| el.2 != 0);
self
}
pub fn with_additional_mass(additional_mass: f64) -> Self {
Self {
elements: ThinVec::new(),
additional_mass: OrderedFloat(additional_mass),
labels: ThinVec::new(),
}
}
pub(crate) fn with_labels(mut self, labels: &[AmbiguousLabel]) -> Self {
self.labels.extend_from_slice(labels);
self
}
pub(crate) fn with_label(mut self, label: AmbiguousLabel) -> Self {
self.labels.push(label);
self
}
pub fn labels(&self) -> &[AmbiguousLabel] {
&self.labels
}
#[must_use]
pub fn add(&mut self, element: (Element, Option<NonZeroU16>, i32)) -> bool {
if element.0.is_valid(element.1) {
let mut index = 0;
let mut done = false;
let (el, i, n) = element;
while !done {
let base = self.elements.get(index).copied();
if let Some((re, ri, _)) = base {
if el > re || (el == re && i > ri) {
index += 1;
} else if el == re && i == ri {
if let Some(n) = self.elements[index].2.checked_add(n) {
self.elements[index].2 = n;
} else {
return false;
}
done = true;
} else {
self.elements.insert(index, (el, i, n));
done = true;
}
} else {
self.elements.push((el, i, n));
done = true;
}
}
true
} else {
false
}
}
pub fn add_mass(&mut self, mass: OrderedFloat<f64>) {
self.additional_mass += mass;
}
pub fn elements(&self) -> &[(Element, Option<NonZeroU16>, i32)] {
&self.elements
}
pub const fn additional_mass(&self) -> OrderedFloat<f64> {
self.additional_mass
}
#[must_use]
pub fn with_global_isotope_modifications(
&self,
substitutions: &[(Element, Option<NonZeroU16>)],
) -> Option<Self> {
if substitutions.iter().all(|e| e.0.is_valid(e.1)) {
let mut new_elements = self.elements.clone();
for item in &mut new_elements {
for (substitute_element, substitute_species) in substitutions {
if item.0 == *substitute_element {
item.1 = *substitute_species;
}
}
}
let result = Self {
elements: new_elements,
additional_mass: self.additional_mass,
labels: self.labels.clone(),
};
Some(result.simplify())
} else {
None
}
}
pub fn charge(&self) -> crate::system::isize::Charge {
-self
.elements
.iter()
.find(|el| el.0 == Element::Electron)
.map_or_else(crate::system::isize::Charge::default, |el| {
crate::system::isize::Charge::new::<crate::system::charge::e>(el.2 as isize)
})
}
pub fn is_empty(&self) -> bool {
self.elements.is_empty() && self.additional_mass == 0.0
}
#[allow(dead_code)]
pub(in super::super) fn hill_notation_generic(
&self,
f: impl Fn(&(Element, Option<NonZeroU16>, i32), &mut String),
) -> String {
let mut buffer = String::new();
if let Some(carbon) = self
.elements
.iter()
.find(|e| e.0 == Element::C && e.1.is_none())
{
if carbon.2 != 0 {
f(carbon, &mut buffer);
}
if let Some(hydrogen) = self
.elements
.iter()
.find(|e| e.0 == Element::H && e.1.is_none())
&& hydrogen.2 != 0
{
f(hydrogen, &mut buffer);
}
for element in self.elements.iter().filter(|e| {
!((e.0 == Element::H || e.0 == Element::C || e.0 == Element::Electron)
&& e.1.is_none())
}) {
if element.2 != 0 {
f(element, &mut buffer);
}
}
} else {
for element in &self.elements {
if element.2 != 0 && element.0 != Element::Electron {
f(element, &mut buffer);
}
}
}
if self.additional_mass != 0.0 {
write!(&mut buffer, "{:+}", self.additional_mass).unwrap();
}
if self.charge().value != 0 {
write!(&mut buffer, ":z{:+}", self.charge().value).unwrap();
}
buffer
}
}
impl Neg for &MolecularFormula {
type Output = MolecularFormula;
fn neg(self) -> Self::Output {
let mut res = self.clone();
for element in &mut res.elements {
element.2 = -element.2;
}
res
}
}
impl Neg for MolecularFormula {
type Output = Self;
fn neg(mut self) -> Self::Output {
for element in &mut self.elements {
element.2 = -element.2;
}
self
}
}
impl Add<&MolecularFormula> for &MolecularFormula {
type Output = MolecularFormula;
fn add(self, rhs: &MolecularFormula) -> Self::Output {
let mut result = (*self).clone();
result.labels.extend_from_slice(&rhs.labels);
let mut index_result = 0;
let mut index_rhs = 0;
result.additional_mass += rhs.additional_mass;
while index_rhs < rhs.elements.len() {
let (el, i, n) = rhs.elements[index_rhs];
if index_result < result.elements.len() {
let (re, ri, _) = result.elements[index_result];
if el > re || (el == re && i > ri) {
index_result += 1;
} else if el == re && i == ri {
result.elements[index_result].2 += n;
index_rhs += 1;
} else {
result.elements.insert(index_result, (el, i, n));
index_rhs += 1;
}
} else {
result.elements.push((el, i, n));
index_rhs += 1;
}
}
result.elements.retain(|el| el.2 != 0);
result
}
}
impl Sub<&MolecularFormula> for &MolecularFormula {
type Output = MolecularFormula;
fn sub(self, rhs: &MolecularFormula) -> Self::Output {
let mut result = (*self).clone();
result.labels.extend_from_slice(&rhs.labels);
let mut index_result = 0;
let mut index_rhs = 0;
result.additional_mass -= rhs.additional_mass;
while index_rhs < rhs.elements.len() {
let (el, i, n) = rhs.elements[index_rhs];
if index_result < result.elements.len() {
let (re, ri, _) = result.elements[index_result];
if el > re || (el == re && i > ri) {
index_result += 1;
} else if el == re && i == ri {
result.elements[index_result].2 -= n;
index_rhs += 1;
} else {
result.elements.insert(index_result, (el, i, -n));
index_rhs += 1;
}
} else {
result.elements.push((el, i, -n));
index_rhs += 1;
}
}
result.elements.retain(|el| el.2 != 0);
result
}
}
impl Mul<&i32> for &MolecularFormula {
type Output = MolecularFormula;
fn mul(self, rhs: &i32) -> Self::Output {
self.checked_mul_i32(*rhs)
.expect("Overflow in multiplying MolecularFormula")
}
}
impl Mul<&u16> for &MolecularFormula {
type Output = MolecularFormula;
fn mul(self, rhs: &u16) -> Self::Output {
self.checked_mul_u16(*rhs)
.expect("Overflow in multiplying MolecularFormula")
}
}
impl MolecularFormula {
pub fn checked_mul_i32(&self, rhs: i32) -> Option<Self> {
Some(Self {
additional_mass: self.additional_mass * f64::from(rhs),
elements: self
.elements
.iter()
.copied()
.map(|part| Some((part.0, part.1, part.2.checked_mul(rhs)?)))
.collect::<Option<_>>()?,
labels: self.labels.clone(),
})
}
pub fn checked_mul_u16(&self, rhs: u16) -> Option<Self> {
Some(Self {
additional_mass: self.additional_mass * f64::from(rhs),
elements: self
.elements
.iter()
.copied()
.map(|part| Some((part.0, part.1, part.2.checked_mul(i32::from(rhs))?)))
.collect::<Option<_>>()?,
labels: self.labels.clone(),
})
}
}
impl Mul<&i8> for &MolecularFormula {
type Output = MolecularFormula;
fn mul(self, rhs: &i8) -> Self::Output {
MolecularFormula {
additional_mass: self.additional_mass * f64::from(*rhs),
elements: self
.elements
.iter()
.copied()
.map(|part| (part.0, part.1, part.2 * i32::from(*rhs)))
.collect(),
labels: self.labels.clone(),
}
}
}
impl_binop_ref_cases!(impl Add, add for MolecularFormula, MolecularFormula, MolecularFormula);
impl_binop_ref_cases!(impl Sub, sub for MolecularFormula, MolecularFormula, MolecularFormula);
impl_binop_ref_cases!(impl Mul, mul for MolecularFormula, i32, MolecularFormula);
impl_binop_ref_cases!(impl Mul, mul for MolecularFormula, u16, MolecularFormula);
impl_binop_ref_cases!(impl Mul, mul for MolecularFormula, i8, MolecularFormula);
impl AddAssign<&Self> for MolecularFormula {
fn add_assign(&mut self, rhs: &Self) {
let mut index_self = 0;
let mut index_rhs = 0;
self.additional_mass += rhs.additional_mass;
self.labels.extend_from_slice(&rhs.labels);
while index_rhs < rhs.elements.len() {
let (el, i, n) = rhs.elements[index_rhs];
if index_self < self.elements.len() {
let (re, ri, _) = self.elements[index_self];
if el > re || (el == re && i > ri) {
index_self += 1;
} else if el == re && i == ri {
self.elements[index_self].2 += n;
index_rhs += 1;
} else {
self.elements.insert(index_self, (el, i, n));
index_rhs += 1;
}
} else {
self.elements.push((el, i, n));
index_rhs += 1;
}
}
}
}
impl SubAssign<&Self> for MolecularFormula {
fn sub_assign(&mut self, rhs: &Self) {
self.labels.extend_from_slice(&rhs.labels);
let mut index_result = 0;
let mut index_rhs = 0;
self.additional_mass -= rhs.additional_mass;
while index_rhs < rhs.elements.len() {
let (el, i, n) = rhs.elements[index_rhs];
if index_result < self.elements.len() {
let (re, ri, _) = self.elements[index_result];
if el > re || (el == re && i > ri) {
index_result += 1;
} else if el == re && i == ri {
self.elements[index_result].2 -= n;
index_rhs += 1;
} else {
self.elements.insert(index_result, (el, i, -n));
index_rhs += 1;
}
} else {
self.elements.push((el, i, -n));
index_rhs += 1;
}
}
self.elements.retain(|el| el.2 != 0);
}
}
impl AddAssign<Self> for MolecularFormula {
fn add_assign(&mut self, rhs: Self) {
*self += &rhs;
}
}
impl SubAssign<Self> for MolecularFormula {
fn sub_assign(&mut self, rhs: Self) {
*self -= &rhs;
}
}
impl std::iter::Sum<Self> for MolecularFormula {
fn sum<I: Iterator<Item = Self>>(iter: I) -> Self {
let mut res = Self::default();
iter.for_each(|v| res += v);
res
}
}
#[macro_export]
macro_rules! molecular_formula {
($($tail:tt)*) => {
$crate::formula_internal!([$($tail)*] -> [])
};
}
#[doc(hidden)]
#[macro_export]
macro_rules! formula_internal {
([$e:ident $n:literal $($tail:tt)*] -> [$($output:tt)*]) => {
$crate::formula_internal!([$($tail)*] -> [$($output)*($crate::chemistry::Element::$e, None, $n),])
};
([[$i:literal $e:ident $n:literal] $($tail:tt)*] -> [$($output:tt)*]) => {
$crate::formula_internal!([$($tail)*] -> [$($output)*($crate::chemistry::Element::$e, Some(std::num::NonZeroU16::new($i).unwrap()), $n),])
};
([$e:ident $n:expr] -> [$($output:tt)*]) =>{
$crate::formula_internal!([] -> [$($output)*($crate::chemistry::Element::$e, None, $n),])
};
([[$i:literal $e:ident] $n:expr] -> [$($output:tt)*]) =>{
$crate::formula_internal!([] -> [$($output)*($crate::chemistry::Element::$e, Some(std::num::NonZeroU16::new($i).unwrap()), $n),])
};
([] -> [$($output:tt)*]) =>{
$crate::chemistry::MolecularFormula::new(&[$($output)*], &[]).unwrap()
};
([($($l:expr),*)] -> [$($output:tt)*]) =>{
$crate::chemistry::MolecularFormula::new(&[$($output)*], &[$($l),*]).unwrap()
};
}