1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
use std::{borrow::Cow, ops::Range};
use serde::{Deserialize, Serialize};
use crate::{
identification::{FastaIdentifier, KnownFileFormat, SpectrumIds},
sequence::{AminoAcid, CompoundPeptidoformIon, Peptidoform, SemiAmbiguous},
system::{Mass, MassOverCharge, Ratio, Time, isize::Charge},
};
/// Generalised access to meta data of identified peptidoforms
pub trait MetaData {
/// Get the compound peptidoform ion, if present
fn compound_peptidoform_ion(&self) -> Option<Cow<'_, CompoundPeptidoformIon>>;
/// Get the format and version for this peptidoform
fn format(&self) -> KnownFileFormat;
/// Get the PSM identifier
fn id(&self) -> String;
/// Get the confidence, a score between -1 and 1 describing the confidence in the entire PSM
fn confidence(&self) -> Option<f64>;
/// Get the local confidence, a score between -1 and 1 for each amino acid in the peptide
fn local_confidence(&self) -> Option<Cow<'_, [f64]>>;
/// Get the original confidence
fn original_confidence(&self) -> Option<f64>;
/// Get the original local confidence, a score for each amino acid in the peptide
fn original_local_confidence(&self) -> Option<&[f64]>;
/// The charge of the precursor/PSM, if known
fn charge(&self) -> Option<Charge>;
/// Which fragmentation mode was used, if known
fn mode(&self) -> Option<&str>; // TODO: should create an enum or use mzdata formats at some point
/// The retention time, if known
fn retention_time(&self) -> Option<Time>;
/// The scans per rawfile that are at the basis for this identified peptide
fn scans(&self) -> SpectrumIds;
/// Get the mz as experimentally determined
fn experimental_mz(&self) -> Option<MassOverCharge>;
/// Get the mass as experimentally determined
fn experimental_mass(&self) -> Option<Mass>;
/// Get the absolute ppm error between the experimental and theoretical precursor mass, if there are multiple masses possible returns the smallest ppm
fn ppm_error(&self) -> Option<Ratio> {
let exp_mass = self.experimental_mass()?;
self.compound_peptidoform_ion().and_then(|f| {
f.formulas()
.iter()
.map(|theo_mass| theo_mass.monoisotopic_mass().ppm(exp_mass))
.min_by(|a, b| a.value.total_cmp(&b.value))
})
}
/// Get the absolute mass error between the experimental and theoretical precursor mass, if there are multiple masses possible returns the smallest difference
fn mass_error(&self) -> Option<Mass> {
let exp_mass = self.experimental_mass()?;
self.compound_peptidoform_ion().and_then(|f| {
f.formulas()
.iter()
.map(|theo_mass| (exp_mass - theo_mass.monoisotopic_mass()).abs())
.min_by(|a, b| a.value.total_cmp(&b.value))
})
}
/// Get the protein names if this was database matched data
fn protein_names(&self) -> Option<Cow<'_, [FastaIdentifier<String>]>>;
/// Get the protein id if this was database matched data
fn protein_id(&self) -> Option<usize>;
/// Get the protein location if this was database matched data
fn protein_location(&self) -> Option<Range<u16>>;
/// Get the flanking sequences on the N and C terminal side.
/// The reported sequences are both in N to C direction.
fn flanking_sequences(&self) -> (&FlankingSequence, &FlankingSequence);
/// The database that was used for matching optionally with the version of the database
fn database(&self) -> Option<(&str, Option<&str>)>;
// Get the matched fragments, potentially with m/z and intensity
// #[doc(hidden)]
// pub fn matched_fragments(
// &self,
// ) -> Option<Vec<(Option<MassOverCharge>, Option<f64>, Fragment)>> {
// // OPair, MaxQuant, PLGS
// None
// }
}
/// A flanking sequence
// Impossible to get the Sequence option smaller (size of a pointer plus alignment of a pointer so the discriminator is 8 bytes as well)
#[allow(variant_size_differences)]
#[derive(Clone, Debug, Default, Deserialize, Eq, PartialEq, Serialize)]
pub enum FlankingSequence {
/// If the flanking sequence is unknown (in _de novo_ for example)
#[default]
Unknown,
/// If this is the terminus
Terminal,
/// If only a single amino acid is known (added to prevent overhead of needing to create a sequence)
AminoAcid(AminoAcid),
/// If a (smal part of the) sequence is known
Sequence(Box<Peptidoform<SemiAmbiguous>>),
}