mzdeisotope_map/
processor.rs

1use std::{collections::HashSet, mem};
2
3use chemical_elements::{isotopic_pattern::TheoreticalIsotopicPattern, neutral_mass};
4
5use identity_hash::BuildIdentityHasher;
6use itertools::Itertools;
7use mzpeaks::{
8    coordinate::{BoundingBox, CoordinateRange, QuadTree, Span1D},
9    feature::{ChargedFeature, Feature, TimeInterval},
10    feature_map::FeatureMap,
11    prelude::*,
12    CoordinateLikeMut, Mass, MZ,
13};
14
15use mzdeisotope::{
16    charge::ChargeRange,
17    isotopic_model::{
18        IsotopicPatternGenerator, TheoreticalIsotopicDistributionScalingMethod, PROTON,
19    },
20    scorer::{IsotopicFitFilter, IsotopicPatternScorer, ScoreInterpretation, ScoreType},
21};
22use mzsignal::feature_mapping::graph::FeatureGraphBuilder;
23use tracing::{debug, trace};
24
25use crate::{
26    dependency_graph::FeatureDependenceGraph, feature_fit::{FeatureSetFit, MapCoordinate}, fmap::IndexedFeature, solution::{reflow_feature, DeconvolvedSolutionFeature, FeatureMerger, MZPointSeries}, traits::{
27        DeconvolutionError, FeatureIsotopicFitter, FeatureMapMatch, FeatureMapType,
28        FeatureSearchParams, FeatureType, GraphFeatureDeconvolution, GraphStepResult,
29    }, FeatureSetIter
30};
31
32#[derive(Debug)]
33pub struct EnvelopeConformer {
34    pub minimum_theoretical_abundance: f64,
35}
36
37impl EnvelopeConformer {
38    pub fn new(minimum_theoretical_abundance: f64) -> Self {
39        Self {
40            minimum_theoretical_abundance,
41        }
42    }
43
44    pub fn conform_into<
45        C: CentroidLike + Default + CoordinateLikeMut<MZ> + IntensityMeasurementMut,
46    >(
47        &self,
48        experimental: Vec<Option<C>>,
49        theoretical: &mut TheoreticalIsotopicPattern,
50        cleaned_eid: &mut Vec<C>,
51    ) -> usize {
52        let mut n_missing: usize = 0;
53        let mut total_intensity = 0.0f32;
54
55        for (observed_peak, peak) in experimental.into_iter().zip(theoretical.iter()) {
56            if observed_peak.is_none() {
57                let mut templated_peak = C::default();
58                *templated_peak.coordinate_mut() = peak.mz();
59                *templated_peak.intensity_mut() = 1.0;
60                if peak.intensity > self.minimum_theoretical_abundance {
61                    n_missing += 1;
62                }
63                total_intensity += 1.0;
64                cleaned_eid.push(templated_peak)
65            } else {
66                let observed_peak = observed_peak.unwrap();
67                total_intensity += observed_peak.intensity();
68                cleaned_eid.push(observed_peak)
69            }
70        }
71
72        let total_intensity = total_intensity as f64;
73        theoretical.iter_mut().for_each(|p| {
74            p.intensity *= total_intensity;
75        });
76        n_missing
77    }
78
79    pub fn conform<C: CentroidLike + Default + CoordinateLikeMut<MZ> + IntensityMeasurementMut>(
80        &self,
81        experimental: Vec<Option<C>>,
82        theoretical: &mut TheoreticalIsotopicPattern,
83    ) -> (Vec<C>, usize) {
84        let mut cleaned_eid = Vec::with_capacity(experimental.len());
85        let n_missing = self.conform_into(experimental, theoretical, &mut cleaned_eid);
86        (cleaned_eid, n_missing)
87    }
88}
89
90const MAX_COMBINATIONS: usize = 1000;
91
92#[derive(Debug)]
93pub struct FeatureProcessorBuilder<
94    Y: Clone + Default,
95    I: IsotopicPatternGenerator,
96    S: IsotopicPatternScorer,
97    F: IsotopicFitFilter,
98> {
99    pub feature_map: FeatureMap<MZ, Y, Feature<MZ, Y>>,
100    pub isotopic_model: Option<I>,
101    pub scorer: Option<S>,
102    pub fit_filter: Option<F>,
103    pub scaling_method: TheoreticalIsotopicDistributionScalingMethod,
104    pub prefer_multiply_charged: bool,
105    pub minimum_size: usize,
106    pub maximum_time_gap: f64,
107    pub minimum_intensity: f32,
108}
109
110impl<
111        Y: Clone + Default,
112        I: IsotopicPatternGenerator,
113        S: IsotopicPatternScorer,
114        F: IsotopicFitFilter,
115    > FeatureProcessorBuilder<Y, I, S, F>
116{
117    pub fn feature_map(&mut self, feature_map: FeatureMap<MZ, Y, Feature<MZ, Y>>) -> &mut Self {
118        self.feature_map = feature_map;
119        self
120    }
121
122    pub fn isotopic_model(&mut self, isotopic_model: I) -> &mut Self {
123        self.isotopic_model = Some(isotopic_model);
124        self
125    }
126
127    pub fn fit_filter(&mut self, fit_filter: F) -> &mut Self {
128        self.fit_filter = Some(fit_filter);
129        self
130    }
131
132    pub fn scorer(&mut self, scorer: S) -> &mut Self {
133        self.scorer = Some(scorer);
134        self
135    }
136
137    pub fn build(self) -> FeatureProcessor<Y, I, S, F> {
138        FeatureProcessor::new(
139            self.feature_map,
140            self.isotopic_model.unwrap(),
141            self.scorer.unwrap(),
142            self.fit_filter.unwrap(),
143            self.minimum_size,
144            self.maximum_time_gap,
145            self.minimum_intensity,
146            self.prefer_multiply_charged,
147        )
148    }
149}
150
151impl<
152        Y: Clone + Default,
153        I: IsotopicPatternGenerator,
154        S: IsotopicPatternScorer,
155        F: IsotopicFitFilter,
156    > Default for FeatureProcessorBuilder<Y, I, S, F>
157{
158    fn default() -> Self {
159        Self {
160            feature_map: Default::default(),
161            isotopic_model: Default::default(),
162            scorer: Default::default(),
163            fit_filter: Default::default(),
164            scaling_method: Default::default(),
165            prefer_multiply_charged: true,
166            minimum_size: 3,
167            maximum_time_gap: 0.25,
168            minimum_intensity: 5.0,
169        }
170    }
171}
172
173#[derive(Debug)]
174pub struct FeatureProcessor<
175    Y: Clone + Default,
176    I: IsotopicPatternGenerator,
177    S: IsotopicPatternScorer,
178    F: IsotopicFitFilter,
179> {
180    pub feature_map: FeatureMapType<Y>,
181    pub isotopic_model: I,
182    pub scorer: S,
183    pub fit_filter: F,
184    pub scaling_method: TheoreticalIsotopicDistributionScalingMethod,
185    pub prefer_multiply_charged: bool,
186    pub minimum_size: usize,
187    pub maximum_time_gap: f64,
188    pub minimum_intensity: f32,
189    feature_buffer: Vec<IndexedFeature<Y>>,
190    envelope_conformer: EnvelopeConformer,
191    dependency_graph: FeatureDependenceGraph,
192}
193
194impl<
195        Y: Clone + Default,
196        I: IsotopicPatternGenerator,
197        S: IsotopicPatternScorer,
198        F: IsotopicFitFilter,
199    > FeatureIsotopicFitter<Y> for FeatureProcessor<Y, I, S, F>
200{
201    fn make_isotopic_pattern(
202        &mut self,
203        mz: f64,
204        charge: i32,
205        search_params: &FeatureSearchParams,
206    ) -> TheoreticalIsotopicPattern {
207        self.isotopic_model.isotopic_cluster(
208            mz,
209            charge,
210            PROTON,
211            search_params.truncate_after,
212            search_params.ignore_below,
213        )
214    }
215
216    fn fit_theoretical_distribution_on_features(
217        &self,
218        mz: f64,
219        error_tolerance: Tolerance,
220        charge: i32,
221        base_tid: TheoreticalIsotopicPattern,
222        max_missed_peaks: usize,
223        threshold_scale: f32,
224        feature: &Option<CoordinateRange<Y>>,
225    ) -> Vec<FeatureSetFit> {
226        let feature_groups =
227            self.match_theoretical_isotopic_distribution(&base_tid, error_tolerance, feature);
228        let n_real = feature_groups
229            .iter()
230            .flatten()
231            .map(|s| s.len())
232            .sum::<usize>();
233
234        if n_real == 0 {
235            return Vec::new();
236        }
237        let fgi = feature_groups
238            .into_iter()
239            .map(|g| {
240                if g.is_none() {
241                    vec![None].into_iter()
242                } else {
243                    let v: Vec<_> = g.unwrap().into_iter().map(Some).collect();
244                    v.into_iter()
245                }
246            })
247            .multi_cartesian_product()
248            .enumerate();
249        let mut snapped_tid: TheoreticalIsotopicPattern;
250        let mut fits = Vec::new();
251        for (i, features) in fgi.take(MAX_COMBINATIONS) {
252            if i % 100 == 0 && i > 0 {
253                debug!("... Considering combination {i} of {n_real} features for {mz}@{charge}");
254            }
255
256            if features.iter().all(|f| f.is_none()) {
257                continue;
258            }
259
260            if let Some((_, f)) = features.first().unwrap() {
261                snapped_tid = base_tid.clone().shift(mz - f.mz());
262            } else {
263                snapped_tid = base_tid.clone();
264            };
265
266            let mut counter = 0;
267            let mut score_max: ScoreType = 0.0;
268
269            let mut score_vec: Vec<ScoreType> = Vec::new();
270            let mut time_vec: Vec<f64> = Vec::new();
271
272            let mut features_vec: Vec<_> = Vec::with_capacity(features.len());
273            let mut indices_vec: Vec<_> = Vec::with_capacity(features.len());
274            for opt in features.into_iter() {
275                if let Some((j, f)) = opt {
276                    indices_vec.push(Some(j));
277                    features_vec.push(Some(f.as_ref()));
278                } else {
279                    features_vec.push(None);
280                    indices_vec.push(None);
281                }
282            }
283
284            let it = FeatureSetIter::new(&features_vec);
285            let mut cleaned_eid = Vec::with_capacity(snapped_tid.len());
286            let mut tid = snapped_tid.clone();
287            for (time, eid) in it {
288                counter += 1;
289                if counter % 500 == 0 && counter > 0 {
290                    debug!("Step {counter} at {time} for feature combination {i} of {mz}@{charge}");
291                }
292                tid.clone_from(&snapped_tid);
293
294                cleaned_eid.clear();
295                let n_missing =
296                    self.envelope_conformer
297                        .conform_into(eid, &mut tid, &mut cleaned_eid);
298                if n_missing > max_missed_peaks {
299                    continue;
300                }
301
302                let score = self.scorer.score(&cleaned_eid, &tid);
303                if score.is_nan() {
304                    continue;
305                }
306                score_max = score_max.max(score);
307                score_vec.push(score);
308                time_vec.push(time);
309            }
310
311            if score_vec.is_empty() {
312                continue;
313            }
314
315            let final_score = self.find_thresholded_score(&score_vec, score_max, threshold_scale);
316
317            if !self.fit_filter.test_score(final_score) {
318                continue;
319            }
320
321            let missing_features: usize = features_vec.iter().map(|f| f.is_none() as usize).sum();
322
323            let (start, end) = features_vec
324                .iter()
325                .flatten()
326                .map(|f| {
327                    let pt = f.iter().next().unwrap();
328                    let start = MapCoordinate::new(pt.0, pt.1);
329                    let pt = f.iter().last().unwrap();
330                    let end = MapCoordinate::new(pt.0, pt.1);
331                    (start, end)
332                })
333                .next()
334                .unwrap();
335
336            let neutral_mass = neutral_mass(snapped_tid.origin, charge, PROTON);
337            let fit = FeatureSetFit::new(
338                indices_vec,
339                snapped_tid,
340                start,
341                end,
342                final_score,
343                charge,
344                missing_features,
345                neutral_mass,
346                counter,
347                score_vec,
348                time_vec,
349            );
350
351            fits.push(fit);
352        }
353
354        fits
355    }
356}
357
358impl<
359        Y: Clone + Default,
360        I: IsotopicPatternGenerator,
361        S: IsotopicPatternScorer,
362        F: IsotopicFitFilter,
363    > FeatureMapMatch<Y> for FeatureProcessor<Y, I, S, F>
364{
365    #[inline(always)]
366    fn feature_map(&self) -> &FeatureMapType<Y> {
367        &self.feature_map
368    }
369
370    fn feature_map_mut(&mut self) -> &mut FeatureMapType<Y> {
371        &mut self.feature_map
372    }
373}
374
375impl<
376        Y: Clone + Default,
377        I: IsotopicPatternGenerator,
378        S: IsotopicPatternScorer,
379        F: IsotopicFitFilter,
380    > GraphFeatureDeconvolution<Y> for FeatureProcessor<Y, I, S, F>
381{
382    #[inline(always)]
383    fn score_interpretation(&self) -> ScoreInterpretation {
384        self.scorer.interpretation()
385    }
386
387    #[inline(always)]
388    fn add_fit_to_graph(&mut self, fit: FeatureSetFit) {
389        self.dependency_graph.add_fit(fit)
390    }
391
392    #[inline(always)]
393    fn prefer_multiply_charged(&self) -> bool {
394        self.prefer_multiply_charged
395    }
396
397    fn skip_feature(&self, feature: &FeatureType<Y>) -> bool {
398        if self.minimum_size > feature.len() {
399            debug!(
400                "Skipping feature {} with {} points",
401                feature.mz(),
402                feature.len()
403            );
404            true
405        } else {
406            false
407        }
408    }
409
410    fn dependency_graph_mut(&mut self) -> &mut FeatureDependenceGraph {
411        &mut self.dependency_graph
412    }
413}
414
415pub(crate) type IndexSet = HashSet<usize, BuildIdentityHasher<usize>>;
416
417impl<
418        Y: Clone + Default,
419        I: IsotopicPatternGenerator,
420        S: IsotopicPatternScorer,
421        F: IsotopicFitFilter,
422    > FeatureProcessor<Y, I, S, F>
423{
424    #[allow(clippy::too_many_arguments)]
425    pub fn new(
426        feature_map: FeatureMap<MZ, Y, Feature<MZ, Y>>,
427        isotopic_model: I,
428        scorer: S,
429        fit_filter: F,
430        minimum_size: usize,
431        maximum_time_gap: f64,
432        minimum_intensity: f32,
433        prefer_multiply_charged: bool,
434    ) -> Self {
435        let dependency_graph = FeatureDependenceGraph::new(scorer.interpretation());
436        Self {
437            feature_map: feature_map.into_iter().map(FeatureType::from).collect(),
438            isotopic_model,
439            scorer,
440            fit_filter,
441            scaling_method: TheoreticalIsotopicDistributionScalingMethod::default(),
442            envelope_conformer: EnvelopeConformer::new(0.05),
443            minimum_size,
444            maximum_time_gap,
445            minimum_intensity: minimum_intensity.max(1.001),
446            prefer_multiply_charged,
447            dependency_graph,
448            feature_buffer: Vec::new(),
449        }
450    }
451
452    pub fn builder() -> FeatureProcessorBuilder<Y, I, S, F> {
453        FeatureProcessorBuilder::default()
454    }
455
456    fn find_thresholded_score(
457        &self,
458        scores: &[ScoreType],
459        maximum_score: ScoreType,
460        percentage: ScoreType,
461    ) -> ScoreType {
462        let threshold = maximum_score * percentage;
463        let (acc, count) = scores.iter().fold((0.0, 0usize), |(total, count), val| {
464            if *val > threshold {
465                (total + val, count + 1)
466            } else {
467                (total, count)
468            }
469        });
470        if count == 0 {
471            0.0
472        } else {
473            acc / count as ScoreType
474        }
475    }
476
477    pub fn finalize_fit(
478        &mut self,
479        fit: &FeatureSetFit,
480        detection_threshold: f32,
481        max_missed_peaks: usize,
482    ) -> DeconvolvedSolutionFeature<Y> {
483        let (time_range, _segments) = fit.find_separation(&self.feature_map, detection_threshold);
484        let features: Vec<_> = fit
485            .features
486            .iter()
487            .map(|i| i.map(|i| self.feature_map.get_item(i).as_ref()))
488            .collect();
489        let feat_iter = FeatureSetIter::new_with_time_interval(
490            &features,
491            time_range.start().unwrap(),
492            time_range.end().unwrap(),
493        );
494
495        let base_tid = &fit.theoretical;
496        let charge = fit.charge;
497        let abs_charge = charge.abs();
498
499        let mut scores = Vec::new();
500
501        // Accumulators for the experimental envelope features
502        let mut envelope_features = Vec::with_capacity(base_tid.len());
503        // Accumulators for the residual intensities for features
504        let mut residuals = Vec::with_capacity(base_tid.len());
505
506        // Initialize the accumulators
507        for _ in base_tid.iter() {
508            envelope_features.push(MZPointSeries::default());
509            residuals.push(Vec::new());
510        }
511
512        let mut deconv_feature = ChargedFeature::empty(charge);
513        for (time, eid) in feat_iter {
514            let mut tid = base_tid.clone();
515            let (cleaned_eid, n_missing) = self.envelope_conformer.conform(eid, &mut tid);
516            let n_real_peaks = cleaned_eid.len() - n_missing;
517            if n_real_peaks == 0
518                || (n_real_peaks == 1 && abs_charge > 1)
519                || n_missing > max_missed_peaks
520            {
521                continue;
522            }
523
524            let score = self.scorer.score(&cleaned_eid, &tid);
525            if score < 0.0 || score.is_nan() {
526                continue;
527            }
528            scores.push(score);
529
530            // Collect all the properties at this time point
531            let mut total_intensity = 0.0;
532            cleaned_eid
533                .iter()
534                .zip(tid.iter())
535                .enumerate()
536                .for_each(|(i, (e, t))| {
537                    let intens = e.intensity().min(t.intensity());
538                    total_intensity += intens;
539
540                    // Update the envelope for this time point
541                    envelope_features
542                        .get_mut(i)
543                        .unwrap()
544                        .push_raw(e.mz(), intens);
545
546                    // Update the residual for this time point
547                    if e.intensity() * 0.7 < t.intensity() {
548                        residuals[i].push(1.0);
549                    } else {
550                        residuals[i].push((e.intensity() - intens).max(1.0));
551                    }
552                    if tracing::enabled!(tracing::Level::TRACE) {
553                        trace!(
554                            "Resid({}): at time {time} slot {i} e:{}/t:{} -> {intens} with residual {}",
555                            fit.features[i].map(|x| x.to_string()).unwrap_or("?".to_string()),
556                            e.intensity(),
557                            t.intensity(),
558                            *residuals[i].last().unwrap()
559                        );
560                    }
561                    debug_assert!(
562                        e.intensity() >= *residuals[i].last().unwrap(),
563                        "{} < {}",
564                        e.intensity(),
565                        *residuals[i].last().unwrap()
566                    );
567                });
568            let neutral_mass = neutral_mass(tid[0].mz, charge, PROTON);
569            deconv_feature.push_raw(neutral_mass, time, total_intensity);
570        }
571
572        drop(features);
573
574        self.do_subtraction(fit, &residuals, &deconv_feature);
575
576        DeconvolvedSolutionFeature::new(
577            deconv_feature,
578            fit.score,
579            scores,
580            envelope_features.into_boxed_slice(),
581        )
582    }
583
584    fn do_subtraction(
585        &mut self,
586        fit: &FeatureSetFit,
587        residuals: &[Vec<f32>],
588        deconv_feature: &ChargedFeature<Mass, Y>,
589    ) {
590        // Do the subtraction along each feature now that the wide reads are done
591        for (i, fidx) in fit.features.iter().enumerate() {
592            if fidx.is_none() {
593                continue;
594            }
595            let fidx = fidx.unwrap();
596
597            // let time_vec: Vec<_> = self.feature_map[fidx].iter_time().collect();
598            let tstart = self.feature_map[fidx].start_time().unwrap();
599            let tend = self.feature_map[fidx].end_time().unwrap();
600
601            let feature_to_reduce = &mut self.feature_map[fidx];
602            let n_of = feature_to_reduce.len();
603            let residual_intensity = &residuals[i];
604            let n_residual = residual_intensity.len();
605            let mz_of = feature_to_reduce.mz();
606
607            for (time, res_int) in deconv_feature
608                .iter_time()
609                .zip(residual_intensity.iter().copied())
610            {
611                if let (Some(j), terr) = feature_to_reduce.find_time(time) {
612                    if let Some((mz_at, time_at, int_at)) = feature_to_reduce.at_mut(j) {
613                        if terr.abs() > 1e-3 {
614                            let terr = time_at - time;
615                            trace!(
616                                "Did not find a coordinate {mz_of} for {time} ({time_at} {terr} {j}) in {i} ({tstart:0.3}-{tend:0.3})",
617                            );
618                        } else {
619                            trace!(
620                                "Residual({fidx}) {int_at} => {res_int} @ {mz_at}|{time}|{time_at} {n_residual}/{n_of}",
621                            );
622                            // debug_assert!(*int_at >= res_int, "Expectation failed: {int_at} >= {res_int}, delta {}", (*int_at - res_int));
623                            *int_at = res_int.min(*int_at);
624                        }
625                    } else {
626                        debug!("{i} unable to update {time}");
627                    }
628                } else {
629                    debug!("{i} unable to update {time}");
630                }
631            }
632
633            feature_to_reduce.invalidate();
634        }
635    }
636
637    fn find_unused_features(
638        &self,
639        fits: &[FeatureSetFit],
640        min_width_mz: f64,
641        min_width_time: f64,
642    ) -> Vec<usize> {
643        let quads: QuadTree<f64, f64, BoundingBox<f64, f64>> = fits
644            .iter()
645            .map(|f| {
646                BoundingBox::new(
647                    (f.start.coord - min_width_mz, f.start.time - min_width_time),
648                    (f.end.coord + min_width_mz, f.end.time + min_width_time),
649                )
650            })
651            .collect();
652        self.feature_map
653            .iter()
654            .enumerate()
655            .filter(|(_, f)| {
656                let bb_f = BoundingBox::new(
657                    (f.mz(), f.start_time().unwrap()),
658                    (f.mz(), f.end_time().unwrap()),
659                );
660                quads.overlaps(&bb_f).is_empty()
661            })
662            .map(|(i, _)| i)
663            .collect()
664    }
665
666    fn mask_features_at(&mut self, indices_to_mask: &[usize]) {
667        for i in indices_to_mask.iter().copied() {
668            let f = &mut self.feature_map[i];
669            for (_, _, z) in f.iter_mut() {
670                *z = 1.0;
671            }
672        }
673    }
674
675    fn remove_dead_points(&mut self, indices_to_mask: Option<&IndexSet>) {
676        let n_before = self.feature_map.len();
677        let n_points_before: usize = self.feature_map.iter().map(|f| f.len()).sum();
678
679        let mut tmp = FeatureMapType::empty();
680
681        mem::swap(&mut tmp, &mut self.feature_map);
682
683        if self.feature_buffer.capacity() < tmp.len() {
684            self.feature_buffer.reserve(tmp.len() - self.feature_buffer.capacity());
685        }
686        let mut features_acc = mem::take(&mut self.feature_buffer);
687
688        let mut inner: Vec<_> = tmp.into_inner().into_inner();
689        for (i, f) in inner.drain(..).enumerate() {
690            let process = indices_to_mask
691                .map(|mask| mask.contains(&i))
692                .unwrap_or(true);
693            if process {
694                let parts: Vec<_> = f
695                    .feature
696                    .split_when(|_, (_, _, cur_int)| cur_int <= self.minimum_intensity)
697                    .into_iter()
698                    .filter(|s| s.len() >= self.minimum_size)
699                    .collect();
700                if parts.len() == 1 {
701                    if parts[0].len() == f.len() {
702                        features_acc.push(f)
703                    } else {
704                        let p: FeatureType<Y> = parts[0].to_owned().into();
705                        features_acc.push(p)
706                    }
707                } else {
708                    for s in parts {
709                        let p: FeatureType<Y> = s.to_owned().into();
710                        features_acc.push(p);
711                    }
712                }
713            } else if f.len() >= self.minimum_size {
714                features_acc.push(f);
715            }
716        }
717        self.feature_buffer = inner;
718        self.feature_map = FeatureMapType::new(features_acc);
719        let n_after = self.feature_map.len();
720        let n_points_after: usize = self.feature_map.iter().map(|f| f.len()).sum();
721        debug!("{n_before} features, {n_points_before} points before, {n_after} features, {n_points_after} points after");
722    }
723
724    #[allow(clippy::too_many_arguments)]
725    pub fn deconvolve(
726        &mut self,
727        error_tolerance: Tolerance,
728        charge_range: ChargeRange,
729        left_search_limit: i8,
730        right_search_limit: i8,
731        search_params: &FeatureSearchParams,
732        convergence: f32,
733        max_iterations: u32,
734    ) -> Result<FeatureMap<Mass, Y, DeconvolvedSolutionFeature<Y>>, DeconvolutionError> {
735        let mut before_tic: f32 = self.feature_map.iter().map(|f| f.total_intensity()).sum();
736        let ref_tic = before_tic;
737        if ref_tic == 0.0 || self.feature_map.is_empty() {
738            debug!("The TIC of the feature map was zero or the feature map was empty. Skipping processing.");
739            return Ok(Default::default());
740        }
741        let mut deconvoluted_features = Vec::new();
742        let mut converged = false;
743        let mut convergence_check = f32::MAX;
744
745        let mut indices_modified = IndexSet::default();
746
747        for i in 0..max_iterations {
748            if i > 0 {
749                self.remove_dead_points(Some(&indices_modified));
750                indices_modified.clear();
751            } else {
752                self.remove_dead_points(None);
753            }
754            debug!(
755                "Starting iteration {i} with remaining TIC {before_tic:0.4e} ({:0.3}%), {} feature fit",
756                before_tic / ref_tic * 100.0,
757                deconvoluted_features.len()
758            );
759
760            let GraphStepResult {
761                selected_fits: fits,
762            } = self.graph_step_deconvolve(
763                error_tolerance,
764                charge_range,
765                left_search_limit,
766                right_search_limit,
767                search_params,
768            )?;
769
770            debug!("Found {} fits", fits.len());
771
772            let minimum_size = self.minimum_size;
773            let max_gap_size = self.maximum_time_gap;
774            let n_before = deconvoluted_features.len();
775            deconvoluted_features.extend(
776                fits.iter()
777                    .filter(|fit| fit.n_points >= minimum_size)
778                    .map(|fit| {
779                        let solution = self.finalize_fit(
780                            fit,
781                            search_params.detection_threshold,
782                            search_params.max_missed_peaks,
783                        );
784                        indices_modified.extend(fit.features.iter().flatten().copied());
785
786                        solution
787                    })
788                    .filter(|f| !f.is_empty())
789                    .flat_map(|f| f.split_sparse(max_gap_size))
790                    .filter(|fit| fit.len() >= minimum_size),
791            );
792
793            let n_new_features = deconvoluted_features.len() - n_before;
794
795            if n_new_features == 0 {
796                debug!("No new features were extracted on iteration {i} with remaining TIC {before_tic:0.4e}, {n_before} features fit");
797                break;
798            }
799
800            if i == 0 {
801                let min_width_mz = self.isotopic_model.largest_isotopic_width();
802                let indices_to_mask = self.find_unused_features(&fits, min_width_mz, max_gap_size);
803                debug!("{} features unused", indices_to_mask.len());
804                self.mask_features_at(&indices_to_mask);
805                indices_modified.extend(indices_to_mask);
806            }
807
808            let after_tic = self
809                .feature_map
810                .iter()
811                .map(|f| f.as_ref().total_intensity())
812                .sum();
813
814            convergence_check = (before_tic - after_tic) / after_tic;
815            if convergence_check <= convergence {
816                debug!(
817                    "Converged on iteration {i} with remaining TIC {before_tic:0.4e} - {after_tic:0.4e} = {:0.4e} ({convergence_check}), {} features fit",
818                    before_tic - after_tic,
819                    deconvoluted_features.len()
820                );
821                converged = true;
822                break;
823            } else {
824                before_tic = after_tic;
825            }
826            self.dependency_graph.reset();
827        }
828        if !converged && !deconvoluted_features.is_empty() {
829            debug!(
830                "Failed to converge after {max_iterations} iterations with remaining TIC {before_tic:0.4e} ({convergence_check}), {} features fit",
831                deconvoluted_features.len()
832            );
833        }
834
835        let map: FeatureMap<_, _, _> = deconvoluted_features
836            .into_iter()
837            .filter(|f| self.fit_filter.test_score(f.score))
838            .collect();
839
840        if map.is_empty() {
841            return Ok(map);
842        }
843
844        debug!("Building merge graph over {} features", map.len());
845        let merger = FeatureMerger::<Y>::default();
846        let map_merged = merger
847            .bridge_feature_gaps(map, Tolerance::PPM(2.0), self.maximum_time_gap)
848            .features
849            .into_iter()
850            .map(reflow_feature)
851            .collect();
852        Ok(map_merged)
853    }
854}
mzdeisotope_map/processor.rs

mzdeisotope_map/
processor.rs