mzdeisotope_map/
traits.rs

1use std::collections::HashMap;
2
3use identity_hash::BuildIdentityHasher;
4use itertools::Itertools;
5
6use chemical_elements::isotopic_pattern::TheoreticalIsotopicPattern;
7
8use mzdeisotope::{
9    charge::{ChargeIterator, ChargeListIter, ChargeRange},
10    isotopic_model::{isotopic_shift, IsotopicPatternParams},
11    scorer::{ScoreInterpretation, ScoreType},
12};
13use mzpeaks::{
14    coordinate::CoordinateRange,
15    feature::TimeInterval,
16    prelude::*,
17    MZ,
18};
19use thiserror::Error;
20use tracing::debug;
21
22use crate::{
23    dependency_graph::{
24        DependenceCluster, FeatureDependenceGraph, FitKey, FitRef, SubgraphSolverMethod,
25    },
26    fmap::{IndexedFeature, IndexedFeatureMap},
27    FeatureSetFit,
28};
29
30pub(crate) type FeatureType<Y> = IndexedFeature<Y>;
31pub(crate) type FeatureMapType<Y> = IndexedFeatureMap<Y>;
32
33/// An error that might occur during deconvolution
34#[derive(Debug, Clone, PartialEq, Error)]
35pub enum DeconvolutionError {
36    #[error("Failed to resolve a deconvolution solution")]
37    FailedToResolveSolution,
38    #[error("Failed to resolve a fit reference {0:?}")]
39    FailedToResolveFit(FitRef),
40}
41
42#[derive(Debug, Clone, Copy)]
43pub struct FeatureSearchParams {
44    pub truncate_after: f64,
45    pub ignore_below: f64,
46    pub max_missed_peaks: usize,
47    pub threshold_scale: f32,
48    pub detection_threshold: f32,
49}
50
51impl Default for FeatureSearchParams {
52    fn default() -> Self {
53        Self {
54            truncate_after: 0.95,
55            ignore_below: 0.05,
56            max_missed_peaks: 1,
57            threshold_scale: 0.3,
58            detection_threshold: 0.1,
59        }
60    }
61}
62
63impl FeatureSearchParams {
64    pub fn new(
65        truncate_after: f64,
66        ignore_below: f64,
67        max_missed_peaks: usize,
68        threshold_scale: f32,
69        detection_threshold: f32,
70    ) -> Self {
71        Self {
72            truncate_after,
73            ignore_below,
74            max_missed_peaks,
75            threshold_scale,
76            detection_threshold,
77        }
78    }
79
80    pub fn as_isotopic_params(&self) -> IsotopicPatternParams {
81        IsotopicPatternParams {
82            incremental_truncation: None,
83            truncate_after: self.truncate_after,
84            ignore_below: self.ignore_below,
85            ..Default::default()
86        }
87    }
88}
89
90pub fn quick_charge_feature<C: FeatureLike<MZ, Y>, Y, const N: usize>(
91    features: &[C],
92    position: usize,
93    charge_range: ChargeRange,
94) -> ChargeListIter {
95    let (min_charge, max_charge) = charge_range;
96    let mut charges = [false; N];
97    if N > 0 {
98        charges[0] = true;
99    }
100    let feature = &features[position];
101    let mut result_size = 0usize;
102    let min_intensity = feature.intensity() / 4.0;
103    let query_time = feature.as_range();
104    let query_mz = feature.mz();
105
106    for other in features
107        .iter()
108        .skip(position + 1)
109        .filter(|f| f.as_range().overlaps(&query_time))
110    {
111        let diff = other.mz() - query_mz;
112        if diff > 1.1 {
113            break;
114        }
115        if other.intensity() < min_intensity {
116            continue;
117        }
118        let raw_charge = 1.0 / diff;
119        let remain = raw_charge - raw_charge.floor();
120        if 0.2 < remain && remain < 0.8 {
121            continue;
122        }
123        let charge = (raw_charge + 0.5) as i32;
124        if charge < min_charge || charge > max_charge {
125            continue;
126        }
127        let idx_z = (charge - 1) as usize;
128        let hit = &mut charges[idx_z];
129        if !*hit {
130            result_size += 1;
131            *hit = true;
132        }
133    }
134
135    let mut result = Vec::with_capacity(result_size);
136    charges.iter().enumerate().for_each(|(j, hit)| {
137        let z = (j + 1) as i32;
138        if *hit && accept_charge(z, &charge_range) {
139            result.push(z)
140        }
141    });
142    result.into()
143}
144
145#[inline(always)]
146fn accept_charge(z: i32, charge_range: &ChargeRange) -> bool {
147    let z = z.abs();
148    charge_range.0.abs() <= z && z <= charge_range.1.abs()
149}
150
151/// An wrapper around [`quick_charge`] which dispatches to an appropriate staticly compiled
152/// variant with minimal stack allocation.
153pub fn quick_charge_feature_w<C: FeatureLike<MZ, Y>, Y>(
154    peaks: &[C],
155    position: usize,
156    charge_range: ChargeRange,
157) -> ChargeListIter {
158    macro_rules! match_i {
159        ($($i:literal, )*) => {
160            match charge_range.1 {
161                $($i => quick_charge_feature::<C, Y, $i>(peaks, position, charge_range),)*
162                i if i > 16 && i < 33 => quick_charge_feature::<C, Y, 32>(peaks, position, charge_range),
163                i if i > 32 && i < 65 => quick_charge_feature::<C, Y, 64>(peaks, position, charge_range),
164                i if i > 64 && i < 129 => quick_charge_feature::<C, Y, 128>(peaks, position, charge_range),
165                _ => quick_charge_feature::<C, Y, 256>(peaks, position, charge_range),
166            }
167        };
168    }
169    match_i!(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,)
170}
171
172pub trait FeatureMapMatch<Y> {
173    fn feature_map(&self) -> &FeatureMapType<Y>;
174    fn feature_map_mut(&mut self) -> &mut FeatureMapType<Y>;
175
176    fn find_all_features(
177        &self,
178        mz: f64,
179        error_tolerance: Tolerance,
180    ) -> Vec<(usize, &FeatureType<Y>)> {
181        let indices = self.feature_map().all_indices_for(mz, error_tolerance);
182        indices
183            .into_iter()
184            .map(|i| (i, &self.feature_map()[i]))
185            .collect_vec()
186    }
187
188    fn find_features(
189        &self,
190        mz: f64,
191        error_tolerance: Tolerance,
192        interval: &Option<CoordinateRange<Y>>,
193    ) -> Option<Vec<(usize, &FeatureType<Y>)>> {
194        let f = self.find_all_features(mz, error_tolerance);
195        if f.is_empty() {
196            None
197        } else if let Some(interval) = interval {
198            let search_width = interval.end.unwrap() - interval.start.unwrap();
199            if search_width == 0.0 {
200                return None;
201            }
202            let f: Vec<(usize, &FeatureType<Y>)> = f
203                .into_iter()
204                .filter(|(_, f)| {
205                    let t = f.as_range();
206                    if t.overlaps(&interval) {
207                        ((t.end.unwrap() - t.start.unwrap()) / search_width) >= 0.05
208                    } else {
209                        false
210                    }
211                })
212                .collect();
213            if f.is_empty() {
214                None
215            } else {
216                Some(f)
217            }
218        } else {
219            Some(f.into_iter().collect_vec())
220        }
221    }
222
223    fn match_theoretical_isotopic_distribution(
224        &self,
225        theoretical_distribution: &TheoreticalIsotopicPattern,
226        error_tolerance: Tolerance,
227        interval: &Option<CoordinateRange<Y>>,
228    ) -> Vec<Option<IndexedIsotopicFitFeatureSet<'_, Y>>> {
229        theoretical_distribution
230            .iter()
231            .map(|p| self.find_features(p.mz, error_tolerance, interval))
232            .collect()
233    }
234}
235
236pub type IndexedIsotopicFitFeatureSet<'a, Y> = Vec<(usize, &'a FeatureType<Y>)>;
237
238pub trait FeatureIsotopicFitter<Y>: FeatureMapMatch<Y> {
239    fn fit_theoretical_distribution(
240        &mut self,
241        feature: usize,
242        error_tolerance: Tolerance,
243        charge: i32,
244        left_search: i8,
245        right_search: i8,
246        search_params: &FeatureSearchParams,
247    ) -> Vec<FeatureSetFit> {
248        let (base_mz, time_range) = {
249            let feature = self.feature_map().get_item(feature);
250            let base_mz = feature.mz();
251            let time_range = Some(feature.as_range());
252            (base_mz, time_range)
253        };
254        let mut all_fits = Vec::new();
255        for offset in -left_search..=right_search {
256            let shift = isotopic_shift(charge) * (offset as f64);
257            let mz = base_mz + shift;
258            all_fits.extend(self.fit_feature_set(
259                mz,
260                error_tolerance,
261                charge,
262                search_params,
263                &time_range,
264            ));
265        }
266        all_fits
267    }
268
269    fn make_isotopic_pattern(
270        &mut self,
271        mz: f64,
272        charge: i32,
273        search_params: &FeatureSearchParams,
274    ) -> TheoreticalIsotopicPattern;
275
276    fn fit_feature_set(
277        &mut self,
278        mz: f64,
279        error_tolerance: Tolerance,
280        charge: i32,
281        search_params: &FeatureSearchParams,
282        feature: &Option<CoordinateRange<Y>>,
283    ) -> Vec<FeatureSetFit> {
284        let base_tid = self.make_isotopic_pattern(mz, charge, search_params);
285
286        self.fit_theoretical_distribution_on_features(
287            mz,
288            error_tolerance,
289            charge,
290            base_tid,
291            search_params.max_missed_peaks,
292            search_params.threshold_scale,
293            feature,
294        )
295    }
296
297    #[allow(clippy::too_many_arguments)]
298    fn fit_theoretical_distribution_on_features(
299        &self,
300        mz: f64,
301        error_tolerance: Tolerance,
302        charge: i32,
303        base_tid: TheoreticalIsotopicPattern,
304        max_missed_peaks: usize,
305        threshold_scale: f32,
306        feature: &Option<CoordinateRange<Y>>,
307    ) -> Vec<FeatureSetFit>;
308}
309
310pub trait GraphFeatureDeconvolution<Y>: FeatureIsotopicFitter<Y> {
311    fn score_interpretation(&self) -> ScoreInterpretation;
312
313    fn add_fit_to_graph(&mut self, fit: FeatureSetFit) {
314        self.dependency_graph_mut().add_fit(fit);
315    }
316
317    fn prefer_multiply_charged(&self) -> bool;
318
319    fn skip_feature(&self, feature: &FeatureType<Y>) -> bool;
320
321    fn dependency_graph_mut(&mut self) -> &mut FeatureDependenceGraph;
322
323    fn populate_graph(
324        &mut self,
325        error_tolerance: Tolerance,
326        charge_range: ChargeRange,
327        left_search: i8,
328        right_search: i8,
329        search_params: &FeatureSearchParams,
330    ) -> usize {
331        let n = self.feature_map().len();
332        if n == 0 {
333            return 0;
334        }
335        (0..n)
336            .rev()
337            .map(move |i| {
338                if i % 5000 == 0 {
339                    debug!(
340                        "Processing feature {}/{n} ({:0.2}%)",
341                        (n - i),
342                        (n - i) as f32 / n as f32 * 100.0
343                    )
344                }
345
346                // If we are only visiting a subset of features, if the feature's key
347                // is not on the list, skip the rest of this work.
348                // if let Some(to_visit) = keys_to_visit {
349                //     if !to_visit.contains(self.feature_map()[i].key.as_ref().unwrap()) {
350                //         return 0;
351                //     }
352                // }
353
354                if self.feature_map()[i].charges().is_none() {
355                    let charge_range_of =
356                        quick_charge_feature_w(self.feature_map().as_slice(), i, charge_range);
357                    let f = &mut self.feature_map_mut()[i];
358                    *f.charges_mut() = Some(Vec::from(charge_range_of.clone()).into_boxed_slice());
359                }
360
361                let charge_range_of: ChargeListIter =
362                    self.feature_map()[i].charges().unwrap().to_vec().into();
363
364                self.explore_local(
365                    i,
366                    error_tolerance,
367                    charge_range_of,
368                    left_search,
369                    right_search,
370                    search_params,
371                )
372            })
373            .sum()
374    }
375
376    fn explore_local<Z: ChargeIterator>(
377        &mut self,
378        feature: usize,
379        error_tolerance: Tolerance,
380        charge_range: Z,
381        left_search: i8,
382        right_search: i8,
383        search_params: &FeatureSearchParams,
384    ) -> usize {
385        if self.skip_feature(self.feature_map().get_item(feature)) {
386            0
387        } else {
388            self.collect_all_fits(
389                feature,
390                error_tolerance,
391                charge_range,
392                left_search,
393                right_search,
394                search_params,
395            )
396        }
397    }
398
399    fn collect_all_fits<Z: ChargeIterator>(
400        &mut self,
401        feature: usize,
402        error_tolerance: Tolerance,
403        charge_range: Z,
404        left_search: i8,
405        right_search: i8,
406        search_params: &FeatureSearchParams,
407    ) -> usize {
408        let (mut best_fit_score, is_maximizing) = match self.score_interpretation() {
409            ScoreInterpretation::HigherIsBetter => (0.0, true),
410            ScoreInterpretation::LowerIsBetter => (ScoreType::INFINITY, false),
411        };
412        let mut best_fit_charge = 0;
413        let prefer_multiply_charged = self.prefer_multiply_charged();
414        let mut holdout = None;
415        let mut counter = 0;
416
417        for charge in charge_range {
418            let current_fits = self.fit_theoretical_distribution(
419                feature,
420                error_tolerance,
421                charge,
422                left_search,
423                right_search,
424                search_params,
425            );
426
427            let is_multiply_charged = charge.abs() > 1;
428            if is_maximizing {
429                for fit in current_fits.iter() {
430                    if fit.score > best_fit_score {
431                        if is_multiply_charged && !fit.has_multiple_real_features() {
432                            continue;
433                        }
434                        best_fit_score = fit.score;
435                        best_fit_charge = charge.abs();
436                    }
437                }
438            } else {
439                for fit in current_fits.iter() {
440                    if fit.score < best_fit_score {
441                        if is_multiply_charged && !fit.has_multiple_real_features() {
442                            continue;
443                        }
444                        best_fit_score = fit.score;
445                        best_fit_charge = charge.abs();
446                    }
447                }
448            }
449            if !is_multiply_charged && prefer_multiply_charged {
450                holdout = Some(current_fits);
451            } else {
452                for fit in current_fits {
453                    if is_multiply_charged && !fit.has_multiple_real_features() {
454                        continue;
455                    }
456                    counter += 1;
457                    if counter % 100 == 0 && counter > 0 {
458                        debug!("Added {counter}th solution for feature {feature} to graph");
459                    }
460                    self.add_fit_to_graph(fit);
461                }
462            }
463        }
464        if holdout.is_some() && best_fit_charge == 1 {
465            for fit in holdout.unwrap() {
466                counter += 1;
467                self.add_fit_to_graph(fit);
468            }
469        }
470        counter
471    }
472
473    fn solve_subgraph_top(
474        &mut self,
475        cluster: DependenceCluster,
476        fits: Vec<(FitRef, FeatureSetFit)>,
477        fit_accumulator: &mut Vec<FeatureSetFit>,
478    ) -> Result<(), DeconvolutionError> {
479        let mut fits: HashMap<FitKey, FeatureSetFit, BuildIdentityHasher<FitKey>> =
480            fits.into_iter().map(|(k, v)| (k.key, v)).collect();
481        for best_fit_key in cluster.iter().take(1) {
482            if let Some(fit) = fits.remove(&best_fit_key.key) {
483                fit_accumulator.push(fit);
484            } else {
485                return Err(DeconvolutionError::FailedToResolveFit(*best_fit_key));
486            }
487        }
488        Ok(())
489    }
490
491    fn select_best_disjoint_subgraphs(
492        &mut self,
493        fit_accumulator: &mut Vec<FeatureSetFit>,
494    ) -> Result<(), DeconvolutionError> {
495        let solutions = self
496            .dependency_graph_mut()
497            .solutions(SubgraphSolverMethod::Greedy);
498        debug!("{} distinct solution clusters", solutions.len());
499        let res: Result<(), DeconvolutionError> = solutions
500            .into_iter()
501            .try_for_each(|(cluster, fits)| {
502                self.solve_subgraph_top(cluster, fits, fit_accumulator)?;
503                Ok(())
504            });
505        res
506    }
507
508    fn graph_step_deconvolve(
509        &mut self,
510        error_tolerance: Tolerance,
511        charge_range: ChargeRange,
512        left_search: i8,
513        right_search: i8,
514        search_params: &FeatureSearchParams,
515    ) -> Result<GraphStepResult, DeconvolutionError> {
516        let mut fit_accumulator = Vec::new();
517        self.populate_graph(
518            error_tolerance,
519            charge_range,
520            left_search,
521            right_search,
522            search_params,
523        );
524        tracing::debug!(
525            "{} fits in the graph",
526            self.dependency_graph_mut().fit_nodes.len()
527        );
528        self.select_best_disjoint_subgraphs(&mut fit_accumulator)?;
529        Ok(GraphStepResult::new(fit_accumulator))
530    }
531}
532
533pub struct GraphStepResult {
534    pub selected_fits: Vec<FeatureSetFit>,
535}
536
537impl GraphStepResult {
538    pub fn new(selected_fits: Vec<FeatureSetFit>) -> Self {
539        Self {
540            selected_fits,
541        }
542    }
543}