1use std::collections::HashMap;
2
3use identity_hash::BuildIdentityHasher;
4use itertools::Itertools;
5
6use chemical_elements::isotopic_pattern::TheoreticalIsotopicPattern;
7
8use mzdeisotope::{
9 charge::{ChargeIterator, ChargeListIter, ChargeRange},
10 isotopic_model::{isotopic_shift, IsotopicPatternParams},
11 scorer::{ScoreInterpretation, ScoreType},
12};
13use mzpeaks::{
14 coordinate::CoordinateRange,
15 feature::TimeInterval,
16 prelude::*,
17 MZ,
18};
19use thiserror::Error;
20use tracing::debug;
21
22use crate::{
23 dependency_graph::{
24 DependenceCluster, FeatureDependenceGraph, FitKey, FitRef, SubgraphSolverMethod,
25 },
26 fmap::{IndexedFeature, IndexedFeatureMap},
27 FeatureSetFit,
28};
29
30pub(crate) type FeatureType<Y> = IndexedFeature<Y>;
31pub(crate) type FeatureMapType<Y> = IndexedFeatureMap<Y>;
32
33#[derive(Debug, Clone, PartialEq, Error)]
35pub enum DeconvolutionError {
36 #[error("Failed to resolve a deconvolution solution")]
37 FailedToResolveSolution,
38 #[error("Failed to resolve a fit reference {0:?}")]
39 FailedToResolveFit(FitRef),
40}
41
42#[derive(Debug, Clone, Copy)]
43pub struct FeatureSearchParams {
44 pub truncate_after: f64,
45 pub ignore_below: f64,
46 pub max_missed_peaks: usize,
47 pub threshold_scale: f32,
48 pub detection_threshold: f32,
49}
50
51impl Default for FeatureSearchParams {
52 fn default() -> Self {
53 Self {
54 truncate_after: 0.95,
55 ignore_below: 0.05,
56 max_missed_peaks: 1,
57 threshold_scale: 0.3,
58 detection_threshold: 0.1,
59 }
60 }
61}
62
63impl FeatureSearchParams {
64 pub fn new(
65 truncate_after: f64,
66 ignore_below: f64,
67 max_missed_peaks: usize,
68 threshold_scale: f32,
69 detection_threshold: f32,
70 ) -> Self {
71 Self {
72 truncate_after,
73 ignore_below,
74 max_missed_peaks,
75 threshold_scale,
76 detection_threshold,
77 }
78 }
79
80 pub fn as_isotopic_params(&self) -> IsotopicPatternParams {
81 IsotopicPatternParams {
82 incremental_truncation: None,
83 truncate_after: self.truncate_after,
84 ignore_below: self.ignore_below,
85 ..Default::default()
86 }
87 }
88}
89
90pub fn quick_charge_feature<C: FeatureLike<MZ, Y>, Y, const N: usize>(
91 features: &[C],
92 position: usize,
93 charge_range: ChargeRange,
94) -> ChargeListIter {
95 let (min_charge, max_charge) = charge_range;
96 let mut charges = [false; N];
97 if N > 0 {
98 charges[0] = true;
99 }
100 let feature = &features[position];
101 let mut result_size = 0usize;
102 let min_intensity = feature.intensity() / 4.0;
103 let query_time = feature.as_range();
104 let query_mz = feature.mz();
105
106 for other in features
107 .iter()
108 .skip(position + 1)
109 .filter(|f| f.as_range().overlaps(&query_time))
110 {
111 let diff = other.mz() - query_mz;
112 if diff > 1.1 {
113 break;
114 }
115 if other.intensity() < min_intensity {
116 continue;
117 }
118 let raw_charge = 1.0 / diff;
119 let remain = raw_charge - raw_charge.floor();
120 if 0.2 < remain && remain < 0.8 {
121 continue;
122 }
123 let charge = (raw_charge + 0.5) as i32;
124 if charge < min_charge || charge > max_charge {
125 continue;
126 }
127 let idx_z = (charge - 1) as usize;
128 let hit = &mut charges[idx_z];
129 if !*hit {
130 result_size += 1;
131 *hit = true;
132 }
133 }
134
135 let mut result = Vec::with_capacity(result_size);
136 charges.iter().enumerate().for_each(|(j, hit)| {
137 let z = (j + 1) as i32;
138 if *hit && accept_charge(z, &charge_range) {
139 result.push(z)
140 }
141 });
142 result.into()
143}
144
145#[inline(always)]
146fn accept_charge(z: i32, charge_range: &ChargeRange) -> bool {
147 let z = z.abs();
148 charge_range.0.abs() <= z && z <= charge_range.1.abs()
149}
150
151pub fn quick_charge_feature_w<C: FeatureLike<MZ, Y>, Y>(
154 peaks: &[C],
155 position: usize,
156 charge_range: ChargeRange,
157) -> ChargeListIter {
158 macro_rules! match_i {
159 ($($i:literal, )*) => {
160 match charge_range.1 {
161 $($i => quick_charge_feature::<C, Y, $i>(peaks, position, charge_range),)*
162 i if i > 16 && i < 33 => quick_charge_feature::<C, Y, 32>(peaks, position, charge_range),
163 i if i > 32 && i < 65 => quick_charge_feature::<C, Y, 64>(peaks, position, charge_range),
164 i if i > 64 && i < 129 => quick_charge_feature::<C, Y, 128>(peaks, position, charge_range),
165 _ => quick_charge_feature::<C, Y, 256>(peaks, position, charge_range),
166 }
167 };
168 }
169 match_i!(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,)
170}
171
172pub trait FeatureMapMatch<Y> {
173 fn feature_map(&self) -> &FeatureMapType<Y>;
174 fn feature_map_mut(&mut self) -> &mut FeatureMapType<Y>;
175
176 fn find_all_features(
177 &self,
178 mz: f64,
179 error_tolerance: Tolerance,
180 ) -> Vec<(usize, &FeatureType<Y>)> {
181 let indices = self.feature_map().all_indices_for(mz, error_tolerance);
182 indices
183 .into_iter()
184 .map(|i| (i, &self.feature_map()[i]))
185 .collect_vec()
186 }
187
188 fn find_features(
189 &self,
190 mz: f64,
191 error_tolerance: Tolerance,
192 interval: &Option<CoordinateRange<Y>>,
193 ) -> Option<Vec<(usize, &FeatureType<Y>)>> {
194 let f = self.find_all_features(mz, error_tolerance);
195 if f.is_empty() {
196 None
197 } else if let Some(interval) = interval {
198 let search_width = interval.end.unwrap() - interval.start.unwrap();
199 if search_width == 0.0 {
200 return None;
201 }
202 let f: Vec<(usize, &FeatureType<Y>)> = f
203 .into_iter()
204 .filter(|(_, f)| {
205 let t = f.as_range();
206 if t.overlaps(&interval) {
207 ((t.end.unwrap() - t.start.unwrap()) / search_width) >= 0.05
208 } else {
209 false
210 }
211 })
212 .collect();
213 if f.is_empty() {
214 None
215 } else {
216 Some(f)
217 }
218 } else {
219 Some(f.into_iter().collect_vec())
220 }
221 }
222
223 fn match_theoretical_isotopic_distribution(
224 &self,
225 theoretical_distribution: &TheoreticalIsotopicPattern,
226 error_tolerance: Tolerance,
227 interval: &Option<CoordinateRange<Y>>,
228 ) -> Vec<Option<IndexedIsotopicFitFeatureSet<'_, Y>>> {
229 theoretical_distribution
230 .iter()
231 .map(|p| self.find_features(p.mz, error_tolerance, interval))
232 .collect()
233 }
234}
235
236pub type IndexedIsotopicFitFeatureSet<'a, Y> = Vec<(usize, &'a FeatureType<Y>)>;
237
238pub trait FeatureIsotopicFitter<Y>: FeatureMapMatch<Y> {
239 fn fit_theoretical_distribution(
240 &mut self,
241 feature: usize,
242 error_tolerance: Tolerance,
243 charge: i32,
244 left_search: i8,
245 right_search: i8,
246 search_params: &FeatureSearchParams,
247 ) -> Vec<FeatureSetFit> {
248 let (base_mz, time_range) = {
249 let feature = self.feature_map().get_item(feature);
250 let base_mz = feature.mz();
251 let time_range = Some(feature.as_range());
252 (base_mz, time_range)
253 };
254 let mut all_fits = Vec::new();
255 for offset in -left_search..=right_search {
256 let shift = isotopic_shift(charge) * (offset as f64);
257 let mz = base_mz + shift;
258 all_fits.extend(self.fit_feature_set(
259 mz,
260 error_tolerance,
261 charge,
262 search_params,
263 &time_range,
264 ));
265 }
266 all_fits
267 }
268
269 fn make_isotopic_pattern(
270 &mut self,
271 mz: f64,
272 charge: i32,
273 search_params: &FeatureSearchParams,
274 ) -> TheoreticalIsotopicPattern;
275
276 fn fit_feature_set(
277 &mut self,
278 mz: f64,
279 error_tolerance: Tolerance,
280 charge: i32,
281 search_params: &FeatureSearchParams,
282 feature: &Option<CoordinateRange<Y>>,
283 ) -> Vec<FeatureSetFit> {
284 let base_tid = self.make_isotopic_pattern(mz, charge, search_params);
285
286 self.fit_theoretical_distribution_on_features(
287 mz,
288 error_tolerance,
289 charge,
290 base_tid,
291 search_params.max_missed_peaks,
292 search_params.threshold_scale,
293 feature,
294 )
295 }
296
297 #[allow(clippy::too_many_arguments)]
298 fn fit_theoretical_distribution_on_features(
299 &self,
300 mz: f64,
301 error_tolerance: Tolerance,
302 charge: i32,
303 base_tid: TheoreticalIsotopicPattern,
304 max_missed_peaks: usize,
305 threshold_scale: f32,
306 feature: &Option<CoordinateRange<Y>>,
307 ) -> Vec<FeatureSetFit>;
308}
309
310pub trait GraphFeatureDeconvolution<Y>: FeatureIsotopicFitter<Y> {
311 fn score_interpretation(&self) -> ScoreInterpretation;
312
313 fn add_fit_to_graph(&mut self, fit: FeatureSetFit) {
314 self.dependency_graph_mut().add_fit(fit);
315 }
316
317 fn prefer_multiply_charged(&self) -> bool;
318
319 fn skip_feature(&self, feature: &FeatureType<Y>) -> bool;
320
321 fn dependency_graph_mut(&mut self) -> &mut FeatureDependenceGraph;
322
323 fn populate_graph(
324 &mut self,
325 error_tolerance: Tolerance,
326 charge_range: ChargeRange,
327 left_search: i8,
328 right_search: i8,
329 search_params: &FeatureSearchParams,
330 ) -> usize {
331 let n = self.feature_map().len();
332 if n == 0 {
333 return 0;
334 }
335 (0..n)
336 .rev()
337 .map(move |i| {
338 if i % 5000 == 0 {
339 debug!(
340 "Processing feature {}/{n} ({:0.2}%)",
341 (n - i),
342 (n - i) as f32 / n as f32 * 100.0
343 )
344 }
345
346 if self.feature_map()[i].charges().is_none() {
355 let charge_range_of =
356 quick_charge_feature_w(self.feature_map().as_slice(), i, charge_range);
357 let f = &mut self.feature_map_mut()[i];
358 *f.charges_mut() = Some(Vec::from(charge_range_of.clone()).into_boxed_slice());
359 }
360
361 let charge_range_of: ChargeListIter =
362 self.feature_map()[i].charges().unwrap().to_vec().into();
363
364 self.explore_local(
365 i,
366 error_tolerance,
367 charge_range_of,
368 left_search,
369 right_search,
370 search_params,
371 )
372 })
373 .sum()
374 }
375
376 fn explore_local<Z: ChargeIterator>(
377 &mut self,
378 feature: usize,
379 error_tolerance: Tolerance,
380 charge_range: Z,
381 left_search: i8,
382 right_search: i8,
383 search_params: &FeatureSearchParams,
384 ) -> usize {
385 if self.skip_feature(self.feature_map().get_item(feature)) {
386 0
387 } else {
388 self.collect_all_fits(
389 feature,
390 error_tolerance,
391 charge_range,
392 left_search,
393 right_search,
394 search_params,
395 )
396 }
397 }
398
399 fn collect_all_fits<Z: ChargeIterator>(
400 &mut self,
401 feature: usize,
402 error_tolerance: Tolerance,
403 charge_range: Z,
404 left_search: i8,
405 right_search: i8,
406 search_params: &FeatureSearchParams,
407 ) -> usize {
408 let (mut best_fit_score, is_maximizing) = match self.score_interpretation() {
409 ScoreInterpretation::HigherIsBetter => (0.0, true),
410 ScoreInterpretation::LowerIsBetter => (ScoreType::INFINITY, false),
411 };
412 let mut best_fit_charge = 0;
413 let prefer_multiply_charged = self.prefer_multiply_charged();
414 let mut holdout = None;
415 let mut counter = 0;
416
417 for charge in charge_range {
418 let current_fits = self.fit_theoretical_distribution(
419 feature,
420 error_tolerance,
421 charge,
422 left_search,
423 right_search,
424 search_params,
425 );
426
427 let is_multiply_charged = charge.abs() > 1;
428 if is_maximizing {
429 for fit in current_fits.iter() {
430 if fit.score > best_fit_score {
431 if is_multiply_charged && !fit.has_multiple_real_features() {
432 continue;
433 }
434 best_fit_score = fit.score;
435 best_fit_charge = charge.abs();
436 }
437 }
438 } else {
439 for fit in current_fits.iter() {
440 if fit.score < best_fit_score {
441 if is_multiply_charged && !fit.has_multiple_real_features() {
442 continue;
443 }
444 best_fit_score = fit.score;
445 best_fit_charge = charge.abs();
446 }
447 }
448 }
449 if !is_multiply_charged && prefer_multiply_charged {
450 holdout = Some(current_fits);
451 } else {
452 for fit in current_fits {
453 if is_multiply_charged && !fit.has_multiple_real_features() {
454 continue;
455 }
456 counter += 1;
457 if counter % 100 == 0 && counter > 0 {
458 debug!("Added {counter}th solution for feature {feature} to graph");
459 }
460 self.add_fit_to_graph(fit);
461 }
462 }
463 }
464 if holdout.is_some() && best_fit_charge == 1 {
465 for fit in holdout.unwrap() {
466 counter += 1;
467 self.add_fit_to_graph(fit);
468 }
469 }
470 counter
471 }
472
473 fn solve_subgraph_top(
474 &mut self,
475 cluster: DependenceCluster,
476 fits: Vec<(FitRef, FeatureSetFit)>,
477 fit_accumulator: &mut Vec<FeatureSetFit>,
478 ) -> Result<(), DeconvolutionError> {
479 let mut fits: HashMap<FitKey, FeatureSetFit, BuildIdentityHasher<FitKey>> =
480 fits.into_iter().map(|(k, v)| (k.key, v)).collect();
481 for best_fit_key in cluster.iter().take(1) {
482 if let Some(fit) = fits.remove(&best_fit_key.key) {
483 fit_accumulator.push(fit);
484 } else {
485 return Err(DeconvolutionError::FailedToResolveFit(*best_fit_key));
486 }
487 }
488 Ok(())
489 }
490
491 fn select_best_disjoint_subgraphs(
492 &mut self,
493 fit_accumulator: &mut Vec<FeatureSetFit>,
494 ) -> Result<(), DeconvolutionError> {
495 let solutions = self
496 .dependency_graph_mut()
497 .solutions(SubgraphSolverMethod::Greedy);
498 debug!("{} distinct solution clusters", solutions.len());
499 let res: Result<(), DeconvolutionError> = solutions
500 .into_iter()
501 .try_for_each(|(cluster, fits)| {
502 self.solve_subgraph_top(cluster, fits, fit_accumulator)?;
503 Ok(())
504 });
505 res
506 }
507
508 fn graph_step_deconvolve(
509 &mut self,
510 error_tolerance: Tolerance,
511 charge_range: ChargeRange,
512 left_search: i8,
513 right_search: i8,
514 search_params: &FeatureSearchParams,
515 ) -> Result<GraphStepResult, DeconvolutionError> {
516 let mut fit_accumulator = Vec::new();
517 self.populate_graph(
518 error_tolerance,
519 charge_range,
520 left_search,
521 right_search,
522 search_params,
523 );
524 tracing::debug!(
525 "{} fits in the graph",
526 self.dependency_graph_mut().fit_nodes.len()
527 );
528 self.select_best_disjoint_subgraphs(&mut fit_accumulator)?;
529 Ok(GraphStepResult::new(fit_accumulator))
530 }
531}
532
533pub struct GraphStepResult {
534 pub selected_fits: Vec<FeatureSetFit>,
535}
536
537impl GraphStepResult {
538 pub fn new(selected_fits: Vec<FeatureSetFit>) -> Self {
539 Self {
540 selected_fits,
541 }
542 }
543}