use std::collections::BTreeMap;
use rayon::prelude::*;
use rayon::ThreadPoolBuilder;
use serde::{Deserialize, Serialize};
#[derive(Clone, Debug, Serialize, Deserialize)]
pub struct SpectrumProcessingConfig {
pub take_top_n: usize,
pub deisotope: bool,
pub deisotope_tolerance_ppm: f64,
pub deisotope_min_mz_diff: f64,
}
impl Default for SpectrumProcessingConfig {
fn default() -> Self {
SpectrumProcessingConfig {
take_top_n: 150,
deisotope: true,
deisotope_tolerance_ppm: 10.0,
deisotope_min_mz_diff: 0.0005,
}
}
}
#[derive(Clone, Debug, Serialize, Deserialize)]
pub struct PreprocessedSpectrum {
pub spec_id: String,
pub precursor_mz: f64,
pub precursor_charge: Option<i32>,
pub precursor_intensity: f64,
pub inverse_ion_mobility: f64,
pub collision_energy: f64,
pub scan_start_time: f64,
pub total_ion_current: f64,
pub mz: Vec<f32>,
pub intensity: Vec<f32>,
pub isolation_mz: f64,
pub isolation_width: f64,
}
impl PreprocessedSpectrum {
pub fn new(
spec_id: String,
precursor_mz: f64,
precursor_charge: Option<i32>,
precursor_intensity: f64,
inverse_ion_mobility: f64,
collision_energy: f64,
scan_start_time: f64,
total_ion_current: f64,
mz: Vec<f32>,
intensity: Vec<f32>,
isolation_mz: f64,
isolation_width: f64,
) -> Self {
PreprocessedSpectrum {
spec_id,
precursor_mz,
precursor_charge,
precursor_intensity,
inverse_ion_mobility,
collision_energy,
scan_start_time,
total_ion_current,
mz,
intensity,
isolation_mz,
isolation_width,
}
}
}
const ISOTOPE_MASS_DIFF: f64 = 1.00335;
const PROTON_MASS: f64 = 1.007276466812;
pub fn deisotope_spectrum(
mz: &[f64],
intensity: &[f64],
tolerance_ppm: f64,
) -> (Vec<f64>, Vec<f64>) {
if mz.is_empty() {
return (Vec::new(), Vec::new());
}
let n = mz.len();
let mut is_isotope = vec![false; n];
for i in 0..n {
if is_isotope[i] {
continue;
}
let current_mz = mz[i];
let current_intensity = intensity[i];
for charge in 1..=4 {
let isotope_spacing = ISOTOPE_MASS_DIFF / charge as f64;
for isotope_num in 1..=3 {
let expected_mz = current_mz + isotope_spacing * isotope_num as f64;
let tolerance = expected_mz * tolerance_ppm / 1_000_000.0;
for j in (i + 1)..n {
if mz[j] < expected_mz - tolerance {
continue;
}
if mz[j] > expected_mz + tolerance {
break;
}
if intensity[j] < current_intensity {
is_isotope[j] = true;
}
}
}
}
}
let mut filtered_mz = Vec::with_capacity(n);
let mut filtered_intensity = Vec::with_capacity(n);
for i in 0..n {
if !is_isotope[i] {
filtered_mz.push(mz[i]);
filtered_intensity.push(intensity[i]);
}
}
(filtered_mz, filtered_intensity)
}
pub fn filter_top_n(
mz: &[f64],
intensity: &[f64],
top_n: usize,
) -> (Vec<f64>, Vec<f64>) {
if mz.len() <= top_n {
return (mz.to_vec(), intensity.to_vec());
}
let mut indices: Vec<usize> = (0..mz.len()).collect();
indices.sort_by(|&a, &b| {
intensity[b].partial_cmp(&intensity[a]).unwrap_or(std::cmp::Ordering::Equal)
});
indices.truncate(top_n);
indices.sort_by(|&a, &b| {
mz[a].partial_cmp(&mz[b]).unwrap_or(std::cmp::Ordering::Equal)
});
let filtered_mz: Vec<f64> = indices.iter().map(|&i| mz[i]).collect();
let filtered_intensity: Vec<f64> = indices.iter().map(|&i| intensity[i]).collect();
(filtered_mz, filtered_intensity)
}
pub fn normalize_intensity(intensity: &[f64], target_sum: f64) -> Vec<f64> {
let sum: f64 = intensity.iter().sum();
if sum == 0.0 {
return intensity.to_vec();
}
intensity.iter().map(|&i| i * target_sum / sum).collect()
}
pub fn flatten_frame_to_spectrum(
tof: &[i32],
mz: &[f64],
intensity: &[f64],
) -> (Vec<f64>, Vec<f64>) {
let mut grouped_data: BTreeMap<i32, Vec<(f64, f64)>> = BTreeMap::new();
for i in 0..tof.len() {
grouped_data
.entry(tof[i])
.or_insert_with(Vec::new)
.push((mz[i], intensity[i]));
}
let mut result_mz = Vec::with_capacity(grouped_data.len());
let mut result_intensity = Vec::with_capacity(grouped_data.len());
for (_, values) in grouped_data {
let sum_intensity: f64 = values.iter().map(|&(_, i)| i).sum();
let avg_mz: f64 = values.iter().map(|&(m, _)| m).sum::<f64>() / values.len() as f64;
result_mz.push(avg_mz);
result_intensity.push(sum_intensity);
}
(result_mz, result_intensity)
}
pub fn get_inverse_mobility_along_scan_marginal(
scan: &[i32],
mobility: &[f64],
intensity: &[f64],
) -> f64 {
let mut marginal_map: BTreeMap<i32, (f64, f64)> = BTreeMap::new();
for i in 0..scan.len() {
let entry = marginal_map.entry(scan[i]).or_insert((0.0, 0.0));
entry.0 += intensity[i]; entry.1 = mobility[i]; }
marginal_map
.iter()
.max_by(|a, b| a.1.0.partial_cmp(&b.1.0).unwrap_or(std::cmp::Ordering::Equal))
.map(|(_, (_, mob))| *mob)
.unwrap_or(0.0)
}
pub fn process_spectrum(
tof: &[i32],
mz: &[f64],
intensity: &[f64],
config: &SpectrumProcessingConfig,
) -> (Vec<f32>, Vec<f32>) {
let (flat_mz, flat_intensity) = flatten_frame_to_spectrum(tof, mz, intensity);
let (deiso_mz, deiso_intensity) = if config.deisotope {
deisotope_spectrum(&flat_mz, &flat_intensity, config.deisotope_tolerance_ppm)
} else {
(flat_mz, flat_intensity)
};
let (final_mz, final_intensity) = filter_top_n(&deiso_mz, &deiso_intensity, config.take_top_n);
let mass_f32: Vec<f32> = final_mz.iter().map(|&x| (x - PROTON_MASS) as f32).collect();
let intensity_f32: Vec<f32> = final_intensity.iter().map(|&x| x as f32).collect();
(mass_f32, intensity_f32)
}
#[derive(Clone, Debug)]
pub struct PASEFFragmentData {
pub frame_id: u32,
pub precursor_id: u32,
pub collision_energy: f64,
pub scan_start_time: f64, pub scan: Vec<i32>,
pub mobility: Vec<f64>,
pub tof: Vec<i32>,
pub mz: Vec<f64>,
pub intensity: Vec<f64>,
pub precursor_mz: f64,
pub precursor_charge: Option<i32>,
pub precursor_intensity: f64,
pub isolation_mz: f64,
pub isolation_width: f64,
}
pub fn process_pasef_fragments_batch(
fragments: Vec<PASEFFragmentData>,
dataset_name: &str,
config: &SpectrumProcessingConfig,
num_threads: usize,
) -> Vec<PreprocessedSpectrum> {
let pool = ThreadPoolBuilder::new()
.num_threads(num_threads)
.build()
.unwrap();
let ds_name = dataset_name.to_string();
pool.install(|| {
fragments
.par_iter()
.map(|frag| {
let inverse_mobility = get_inverse_mobility_along_scan_marginal(
&frag.scan,
&frag.mobility,
&frag.intensity,
);
let (processed_mz, processed_intensity) = process_spectrum(
&frag.tof,
&frag.mz,
&frag.intensity,
config,
);
let total_ion_current: f64 = frag.intensity.iter().sum();
let spec_id = format!("{}-{}-{}", frag.frame_id, frag.precursor_id, ds_name);
PreprocessedSpectrum::new(
spec_id,
frag.precursor_mz,
frag.precursor_charge,
frag.precursor_intensity,
inverse_mobility,
frag.collision_energy,
frag.scan_start_time,
total_ion_current,
processed_mz,
processed_intensity,
frag.isolation_mz,
frag.isolation_width,
)
})
.collect()
})
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_deisotope_spectrum() {
let mz = vec![500.0, 501.003, 502.006, 600.0];
let intensity = vec![1000.0, 500.0, 250.0, 800.0];
let (filtered_mz, filtered_intensity) = deisotope_spectrum(&mz, &intensity, 10.0);
assert_eq!(filtered_mz.len(), 2);
assert!((filtered_mz[0] - 500.0).abs() < 0.001);
assert!((filtered_mz[1] - 600.0).abs() < 0.001);
}
#[test]
fn test_filter_top_n() {
let mz = vec![100.0, 200.0, 300.0, 400.0, 500.0];
let intensity = vec![10.0, 50.0, 30.0, 100.0, 20.0];
let (filtered_mz, filtered_intensity) = filter_top_n(&mz, &intensity, 3);
assert_eq!(filtered_mz.len(), 3);
assert!((filtered_mz[0] - 200.0).abs() < 0.001);
assert!((filtered_mz[1] - 300.0).abs() < 0.001);
assert!((filtered_mz[2] - 400.0).abs() < 0.001);
}
#[test]
fn test_flatten_frame_to_spectrum() {
let tof = vec![1000, 1000, 2000];
let mz = vec![500.0, 500.1, 600.0];
let intensity = vec![100.0, 200.0, 300.0];
let (flat_mz, flat_intensity) = flatten_frame_to_spectrum(&tof, &mz, &intensity);
assert_eq!(flat_mz.len(), 2);
assert!((flat_mz[0] - 500.05).abs() < 0.001);
assert!((flat_intensity[0] - 300.0).abs() < 0.001);
assert!((flat_mz[1] - 600.0).abs() < 0.001);
assert!((flat_intensity[1] - 300.0).abs() < 0.001);
}
#[test]
fn test_get_inverse_mobility_along_scan_marginal() {
let scan = vec![100, 100, 101, 101];
let mobility = vec![1.0, 1.0, 1.1, 1.1];
let intensity = vec![50.0, 100.0, 200.0, 100.0];
let result = get_inverse_mobility_along_scan_marginal(&scan, &mobility, &intensity);
assert!((result - 1.1).abs() < 0.001);
}
#[test]
fn test_process_spectrum() {
let tof = vec![1000, 1001, 1002, 2000, 2001];
let mz = vec![500.0, 501.003, 502.006, 600.0, 601.003];
let intensity = vec![1000.0, 500.0, 250.0, 800.0, 400.0];
let config = SpectrumProcessingConfig {
take_top_n: 10,
deisotope: true,
deisotope_tolerance_ppm: 10.0,
deisotope_min_mz_diff: 0.0005,
};
let (processed_mz, processed_intensity) = process_spectrum(&tof, &mz, &intensity, &config);
assert!(processed_mz.len() <= mz.len());
let processed_sum: f32 = processed_intensity.iter().sum();
let original_sum: f64 = intensity.iter().sum();
assert!(processed_sum < original_sum as f32);
}
}