1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
//! Re-bin a single spectrum, or average together multiple spectra using
//! interpolation.
//!
use std::borrow::Cow;
use std::cmp;
use std::collections::VecDeque;
use std::ops::{Add, Index};

#[cfg(feature = "parallelism")]
use rayon::prelude::*;
#[cfg(feature = "parallelism")]
use std::sync::Mutex;

use cfg_if;

use crate::arrayops::{gridspace, ArrayPair, MZGrid};
use crate::search;
use num_traits::{Float, ToPrimitive};

/// A linear interpolation spectrum intensity averager over a shared m/z axis.
#[derive(Debug, Default, Clone)]
pub struct SignalAverager<'lifespan> {
    /// The evenly spaced m/z axis over which spectra are averaged.
    pub mz_grid: Vec<f64>,
    /// The lowest m/z in the spectrum. If an input spectrum has lower m/z values, they will be ignored.
    pub mz_start: f64,
    /// The highest m/z in the spectrum. If an input spectrum has higher m/z values, they will be ignored.
    pub mz_end: f64,
    /// The spacing between m/z values in `mz_grid`. This value should be chosen relative to the sharpness
    /// of the peak shape of the mass analyzer used, but the smaller it is, the more computationally intensive
    /// the averaging process is, and the more memory it consumes.
    pub dx: f64,
    /// The current set of spectra to be averaged together. This uses a deque because the usecase of
    /// pushing spectra into an averaging window while removing them from the other side fits with the
    /// way one might average spectra over time.
    pub array_pairs: VecDeque<ArrayPair<'lifespan>>,
}

impl<'a, 'b: 'a> SignalAverager<'a> {
    pub fn new(mz_start: f64, mz_end: f64, dx: f64) -> SignalAverager<'a> {
        SignalAverager {
            mz_grid: gridspace(mz_start, mz_end, dx),
            mz_start,
            mz_end,
            dx,
            array_pairs: VecDeque::new(),
        }
    }

    /// Put `pair` into the queue of arrays being averaged together.
    pub fn push(&mut self, pair: ArrayPair<'b>) {
        self.array_pairs.push_back(pair)
    }

    /// Remove the least recently added array pair from the queue.
    pub fn pop(&mut self) -> Option<ArrayPair<'a>> {
        self.array_pairs.pop_front()
    }

    pub fn len(&self) -> usize {
        self.array_pairs.len()
    }

    pub fn is_empty(&self) -> bool {
        self.array_pairs.is_empty()
    }

    /// Linear interpolation between two control points to find the intensity
    /// at a third point between them.
    ///
    /// # Arguments
    /// - `mz_j` - The first control point's m/z
    /// - `mz_x` - The interpolated m/z
    /// - `mz_j1` - The second control point's m/z
    /// - `inten_j` - The first control point's intensity
    /// - `inten_j1` - The second control point's intensity
    #[inline]
    pub fn interpolate_point(
        &self,
        mz_j: f64,
        mz_x: f64,
        mz_j1: f64,
        inten_j: f64,
        inten_j1: f64,
    ) -> f64 {
        ((inten_j * (mz_j1 - mz_x)) + (inten_j1 * (mz_x - mz_j))) / (mz_j1 - mz_j)
    }

    /// A linear interpolation across all spectra between `start_mz` and `end_mz`, with
    /// their intensities written into `out`.
    pub fn interpolate_into(&self, out: &mut [f32], start_mz: f64, end_mz: f64) -> usize {
        let offset = self.find_offset(start_mz);
        let stop_index = self.find_offset(end_mz);
        // debug_assert!(stop_index - offset == out.len());

        for block in self.array_pairs.iter() {
            for i in offset..stop_index {
                let x = self.mz_grid[i];
                let j = block.find(x);
                let mz_j = block.mz_array[j];

                let (mz_j, inten_j, mz_j1, inten_j1) = if (mz_j <= x) && ((j + 1) < block.len()) {
                    (
                        mz_j,
                        block.intensity_array[j],
                        block.mz_array[j + 1],
                        block.intensity_array[j + 1],
                    )
                } else if mz_j > x && j > 0 {
                    (
                        block.mz_array[j - 1],
                        block.intensity_array[j - 1],
                        block.mz_array[j],
                        block.intensity_array[j],
                    )
                } else {
                    continue;
                };
                let interp =
                    self.interpolate_point(mz_j, x, mz_j1, inten_j as f64, inten_j1 as f64);
                out[i - offset] += interp as f32;
            }
        }
        if self.array_pairs.len() > 1 {
            let normalizer = self.array_pairs.len() as f32;
            out.iter_mut().for_each(|y| *y /= normalizer);
        }
        stop_index - offset
    }

    pub fn interpolate_chunks(&self, n_chunks: usize) -> Vec<f32> {
        let mut result = self.create_intensity_array();
        if self.array_pairs.is_empty() {
            return result;
        }
        let n_points = self.points_between(self.mz_start, self.mz_end);

        let points_per_chunk = n_points / n_chunks;
        for i in 0..n_chunks {
            let offset = i * points_per_chunk;
            let (size, start_mz, end_mz) = if i == n_chunks - 1 {
                (n_points - offset, self.mz_grid[offset], self.mz_end)
            } else {
                (
                    points_per_chunk,
                    self.mz_grid[offset],
                    self.mz_grid[offset + points_per_chunk],
                )
            };
            let mut sub = self.create_intensity_array_of_size(size);
            self.interpolate_into(&mut sub, start_mz, end_mz);
            (result[offset..offset + size]).copy_from_slice(&sub);
        }
        result
    }

    #[cfg(feature = "parallelism")]
    #[allow(unused)]
    pub(crate) fn interpolate_chunks_parallel_locked(&'a self, n_chunks: usize) -> Vec<f32> {
        let result = self.create_intensity_array();
        if self.array_pairs.is_empty() {
            return result;
        }
        let n_points = self.points_between(self.mz_start, self.mz_end);
        let locked_result = Mutex::new(result);
        let points_per_chunk = n_points / n_chunks;
        (0..n_chunks).into_par_iter().for_each(|i| {
            let offset = i * points_per_chunk;
            let (size, start_mz, end_mz) = if i == n_chunks - 1 {
                (n_points - offset, self.mz_grid[offset], self.mz_end)
            } else {
                (
                    points_per_chunk,
                    self.mz_grid[offset],
                    self.mz_grid[offset + points_per_chunk],
                )
            };
            let mut sub = self.create_intensity_array_of_size(size);
            self.interpolate_into(&mut sub, start_mz, end_mz);

            let mut out = locked_result.lock().unwrap();
            (out[offset..offset + size]).copy_from_slice(&sub);
        });
        locked_result.into_inner().unwrap()
    }

    #[cfg(feature = "parallelism")]
    #[allow(unused)]
    pub(crate) fn interpolate_chunks_parallel(&'a self, n_chunks: usize) -> Vec<f32> {
        let mut result = self.create_intensity_array();
        if self.array_pairs.is_empty() {
            return result;
        }
        let n_points = self.points_between(self.mz_start, self.mz_end);
        let points_per_chunk = n_points / n_chunks;
        let mz_chunks: Vec<&[f64]> = self.mz_grid.chunks(points_per_chunk).collect();
        let mut intensity_chunks: Vec<&mut [f32]> = result.chunks_mut(points_per_chunk).collect();

        intensity_chunks[..]
            .par_iter_mut()
            .zip(mz_chunks[..].par_iter())
            .for_each(|(mut intensity_chunk, mz_chunk)| {
                let start_mz = mz_chunk.first().unwrap();
                // The + 1e-6 is just a gentle push to get interpolate_into to roll over to the last position in the chunk
                let end_mz = mz_chunk.last().unwrap() + 1e-6;
                self.interpolate_into(intensity_chunk, *start_mz, end_mz);
            });
        result
    }

    /// Allocate a new intensity array, [`interpolate_into`](SignalAverager::interpolate_into) it, and return it.
    pub fn interpolate(&'a self) -> Vec<f32> {
        let mut result = self.create_intensity_array();
        self.interpolate_into(&mut result, self.mz_start, self.mz_end);
        result
    }
}

impl<'lifespan> MZGrid for SignalAverager<'lifespan> {
    fn mz_grid(&self) -> &[f64] {
        &self.mz_grid
    }
}

impl<'lifespan> Extend<ArrayPair<'lifespan>> for SignalAverager<'lifespan> {
    fn extend<T: IntoIterator<Item = ArrayPair<'lifespan>>>(&mut self, iter: T) {
        self.array_pairs.extend(iter)
    }
}

// Can't inline cfg-if
cfg_if::cfg_if! {
    if #[cfg(feature = "parallelism")] {
        fn average_signal_inner(averager: &SignalAverager, n: usize) -> Vec<f32> {
            averager.interpolate_chunks_parallel(3 + n)
        }
    } else {
        fn average_signal_inner(averager: &SignalAverager, n: usize) -> Vec<f32> {
            averager.interpolate()
        }
    }
}

/// Average together signal from the slice of `ArrayPair`s with spacing `dx` and create
/// a new `ArrayPair` from it
pub fn average_signal<'lifespan, 'owned: 'lifespan>(signal: &[ArrayPair<'lifespan>], dx: f64) -> ArrayPair<'owned> {
    let (mz_min, mz_max) = signal.iter().fold((f64::infinity(), 0.0), |acc, x| {
        (
            if acc.0 < x.min_mz { acc.0 } else { x.min_mz },
            if acc.1 > x.max_mz { acc.1 } else { x.max_mz },
        )
    });
    let mut averager = SignalAverager::new(mz_min, mz_max, dx);
    averager
        .array_pairs
        .extend(signal.iter().map(|a| a.borrow()));
    let signal = average_signal_inner(&averager, signal.len());
    ArrayPair::new(Cow::Owned(averager.copy_mz_array()), Cow::Owned(signal))
}

pub fn rebin<'transient, 'lifespan: 'transient>(
    mz_array: &'lifespan [f64],
    intensity_array: &'lifespan [f32],
    dx: f64,
) -> ArrayPair<'transient> {
    let pair = [ArrayPair::from((mz_array, intensity_array))];
    average_signal(&pair, dx)
}

#[cfg(test)]
mod test {
    use super::*;
    use crate::peak_picker::PeakPicker;
    use crate::test_data::{X, Y};

    #[test]
    fn test_rebin_one() {
        let mut averager = SignalAverager::new(X[0], X[X.len() - 1], 0.00001);
        averager.push(ArrayPair::wrap(&X, &Y));
        let yhat = averager.interpolate();
        let picker = PeakPicker::default();
        let mut acc = Vec::new();
        picker
            .discover_peaks(&averager.mz_grid, &yhat, &mut acc)
            .expect("Signal can be picked");
        let mzs = [180.0633881, 181.06338858024316, 182.06338874740308];
        for (peak, mz) in acc.iter().zip(mzs.iter()) {
            assert!((peak.mz - mz).abs() < 1e-6);
            assert!(peak.intensity > 0.0);
        }
    }

    #[test]
    fn test_rebin_chunked() {
        let mut averager = SignalAverager::new(X[0], X[X.len() - 1], 0.00001);
        averager.push(ArrayPair::wrap(&X, &Y));
        let yhat = averager.interpolate_chunks(3);
        let picker = PeakPicker::default();
        let mut acc = Vec::new();
        picker
            .discover_peaks(&averager.mz_grid, &yhat, &mut acc)
            .expect("Signal can be picked");
        let mzs = [180.0633881, 181.06338858024316, 182.06338874740308];
        for (peak, mz) in acc.iter().zip(mzs.iter()) {
            assert!((peak.mz - mz).abs() < 1e-6);
        }
    }

    #[test]
    #[cfg(feature = "parallelism")]
    fn test_rebin_parallel_locked() {
        let mut averager = SignalAverager::new(X[0], X[X.len() - 1], 0.00001);
        averager.push(ArrayPair::wrap(&X, &Y));
        let yhat = averager.interpolate_chunks_parallel_locked(6);
        let picker = PeakPicker::default();
        let mut acc = Vec::new();
        picker
            .discover_peaks(&averager.mz_grid, &yhat, &mut acc)
            .expect("Signal can be picked");
        let mzs = [180.0633881, 181.06338858024316, 182.06338874740308];
        for (peak, mz) in acc.iter().zip(mzs.iter()) {
            assert!((peak.mz - mz).abs() < 1e-6);
        }
    }

    #[test]
    #[cfg(feature = "parallelism")]
    fn test_rebin_parallel() {
        let mut averager = SignalAverager::new(X[0], X[X.len() - 1], 0.00001);
        averager.push(ArrayPair::wrap(&X, &Y));
        let yhat = averager.interpolate_chunks_parallel(6);
        let picker = PeakPicker::default();
        let mut acc = Vec::new();
        picker
            .discover_peaks(&averager.mz_grid, &yhat, &mut acc)
            .expect("Signal can be picked");
        let mzs = [180.0633881, 181.06338858024316, 182.06338874740308];
        for (peak, mz) in acc.iter().zip(mzs.iter()) {
            assert!((peak.mz - mz).abs() < 1e-6);
        }
    }
}