use std::cmp::Ordering;
use std::collections::HashMap;
use std::hash::Hash;
use rayon::prelude::*;
use serde::{Deserialize, Serialize};
use crate::cluster::candidates::CandidateOpts;
use crate::cluster::cluster::ClusterResult1D;
use crate::cluster::feature::SimpleFeature;
use crate::cluster::scoring::{assign_ms2_to_best_ms1_by_xic, enumerate_ms2_ms1_pairs_simple, XicScoreOpts};
use crate::data::dia::TimsDatasetDIA;
pub fn cluster_mz_mu(c: &ClusterResult1D) -> Option<f32> {
if let Some(ref f) = c.mz_fit {
if f.mu.is_finite() && f.mu > 0.0 {
return Some(f.mu);
}
}
if let Some((lo, hi)) = c.mz_window {
let mu = 0.5 * (lo + hi);
if mu.is_finite() && mu > 0.0 {
return Some(mu);
}
}
None
}
#[derive(Clone, Debug, Serialize, Deserialize)]
pub struct PseudoFragment {
pub mz: f32,
pub intensity: f32,
pub ms2_cluster_index: usize,
pub ms2_cluster_id: u64,
pub window_group: u32,
}
#[derive(Clone, Debug, Serialize, Deserialize)]
pub struct PseudoSpectrum {
pub precursor_mz: f32,
pub precursor_charge: u8,
pub rt_apex: f32,
pub im_apex: f32,
pub window_groups: Vec<u32>,
pub feature_id: Option<usize>,
pub precursor_cluster_indices: Vec<usize>,
pub precursor_cluster_ids: Vec<u64>,
pub fragments: Vec<PseudoFragment>,
}
#[derive(Clone, Debug)]
pub struct PseudoSpecOpts {
pub top_n_fragments: usize,
}
impl Default for PseudoSpecOpts {
fn default() -> Self {
Self {
top_n_fragments: 500, }
}
}
#[derive(Clone, Copy, Debug, Eq, PartialEq, Hash)]
pub enum PrecursorKey {
Feature {
feature_id: usize,
window_group: u32,
},
OrphanCluster {
cluster_idx: usize,
window_group: u32,
},
}
fn build_cluster_to_feature_map(n_ms1: usize, features: &[SimpleFeature]) -> Vec<Option<usize>> {
let mut map = vec![None; n_ms1];
for (f_idx, feat) in features.iter().enumerate() {
for &ci in &feat.member_cluster_indices {
if ci < n_ms1 {
map[ci] = Some(f_idx);
}
}
}
map
}
fn precursor_apex_from_feature_or_cluster(
key: PrecursorKey,
ms1: &[ClusterResult1D],
features: &[SimpleFeature],
) -> (f32, f32) {
match key {
PrecursorKey::OrphanCluster { cluster_idx, .. } => {
let c = &ms1[cluster_idx];
(c.rt_fit.mu, c.im_fit.mu)
}
PrecursorKey::Feature { feature_id, .. } => {
let feat = &features[feature_id];
let mut rt_w = 0.0_f64;
let mut im_w = 0.0_f64;
let mut wsum = 0.0_f64;
for &ci in &feat.member_cluster_indices {
if ci >= ms1.len() {
continue;
}
let c = &ms1[ci];
let w = c.raw_sum.max(1.0) as f64;
if c.rt_fit.mu.is_finite() {
rt_w += (c.rt_fit.mu as f64) * w;
}
if c.im_fit.mu.is_finite() {
im_w += (c.im_fit.mu as f64) * w;
}
wsum += w;
}
if wsum > 0.0 {
((rt_w / wsum) as f32, (im_w / wsum) as f32)
} else {
let (rt_lo, rt_hi) = feat.rt_bounds;
let (im_lo, im_hi) = feat.im_bounds;
(
0.5 * ((rt_lo + rt_hi) as f32),
0.5 * ((im_lo + im_hi) as f32),
)
}
}
}
}
pub fn build_pseudo_spectra_from_pairs(
ms1: &[ClusterResult1D],
ms2: &[ClusterResult1D],
features: &[SimpleFeature],
pairs: &[(usize, usize)],
opts: &PseudoSpecOpts,
) -> Vec<PseudoSpectrum> {
if ms1.is_empty() || ms2.is_empty() || pairs.is_empty() {
return Vec::new();
}
let cluster_to_feature = build_cluster_to_feature_map(ms1.len(), features);
let mut grouped: HashMap<PrecursorKey, Vec<usize>> = HashMap::new();
for &(ms2_idx, ms1_idx) in pairs {
if ms1_idx >= ms1.len() || ms2_idx >= ms2.len() {
continue;
}
let g = ms2[ms2_idx]
.window_group
.unwrap_or(0);
let key = match cluster_to_feature[ms1_idx] {
Some(fid) => PrecursorKey::Feature {
feature_id: fid,
window_group: g,
},
None => PrecursorKey::OrphanCluster {
cluster_idx: ms1_idx,
window_group: g,
},
};
grouped.entry(key).or_default().push(ms2_idx);
}
if grouped.is_empty() {
return Vec::new();
}
let top_n = opts.top_n_fragments;
let mut spectra: Vec<PseudoSpectrum> = grouped
.into_par_iter()
.map(|(key, mut frag_indices)| {
frag_indices.sort_unstable();
frag_indices.dedup();
let (prec_mz, charge, prec_cluster_indices, prec_cluster_ids, _wg) = match key {
PrecursorKey::OrphanCluster { cluster_idx, window_group } => {
let c = &ms1[cluster_idx];
let mz = cluster_mz_mu(c).unwrap_or(0.0);
let z = 0u8;
let idxs = vec![cluster_idx];
let ids = vec![c.cluster_id];
(mz, z, idxs, ids, window_group)
}
PrecursorKey::Feature { feature_id, window_group } => {
let feat = &features[feature_id];
let mz = feat.mz_mono;
let z = feat.charge;
let idxs = feat.member_cluster_indices.clone();
let ids = feat.member_cluster_ids.clone();
(mz, z, idxs, ids, window_group)
}
};
let (rt_apex, im_apex) =
precursor_apex_from_feature_or_cluster(key, ms1, features);
let mut frags: Vec<PseudoFragment> = frag_indices
.into_iter()
.filter_map(|j| {
let c2 = &ms2[j];
let mz = match cluster_mz_mu(c2) {
Some(m) if m.is_finite() && m > 0.0 => m,
_ => return None,
};
let intensity = c2.raw_sum.max(0.0);
Some(PseudoFragment {
mz,
intensity,
ms2_cluster_index: j,
ms2_cluster_id: c2.cluster_id,
window_group: c2.window_group.unwrap_or(0),
})
})
.collect();
frags.sort_unstable_by(|a, b| {
b.intensity
.partial_cmp(&a.intensity)
.unwrap_or(Ordering::Equal)
});
if top_n > 0 && frags.len() > top_n {
frags.truncate(top_n);
}
PseudoSpectrum {
precursor_mz: prec_mz,
precursor_charge: charge,
rt_apex,
im_apex,
window_groups: {
let mut wgs: Vec<u32> = frags.iter().map(|f| f.window_group).collect();
wgs.sort_unstable();
wgs.dedup();
wgs
},
feature_id: match key {
PrecursorKey::Feature { feature_id, .. } => Some(feature_id),
PrecursorKey::OrphanCluster { .. } => None,
},
precursor_cluster_indices: prec_cluster_indices,
precursor_cluster_ids: prec_cluster_ids,
fragments: frags,
}
})
.collect();
spectra.sort_unstable_by(|a, b| {
a.rt_apex
.partial_cmp(&b.rt_apex)
.unwrap_or(Ordering::Equal)
.then_with(|| {
a.precursor_mz
.partial_cmp(&b.precursor_mz)
.unwrap_or(Ordering::Equal)
})
});
spectra
}
pub fn build_pseudo_spectra_with_xic(
ds: &TimsDatasetDIA,
ms1: &[ClusterResult1D],
ms2: &[ClusterResult1D],
features: &[SimpleFeature],
cand_opts: &CandidateOpts,
xic_opts: &XicScoreOpts,
pseudo_opts: &PseudoSpecOpts,
) -> (Vec<PseudoSpectrum>, Vec<(usize, usize, f32)>) {
let pairs_geom: Vec<(usize, usize)> =
enumerate_ms2_ms1_pairs_simple(ds, ms1, ms2, cand_opts);
if pairs_geom.is_empty() {
return (Vec::new(), Vec::new());
}
let pairs_scored: Vec<(usize, usize, f32)> =
assign_ms2_to_best_ms1_by_xic(ms1, ms2, &pairs_geom, xic_opts);
if pairs_scored.is_empty() {
return (Vec::new(), Vec::new());
}
let final_pairs: Vec<(usize, usize)> = pairs_scored
.iter()
.map(|(ms2_idx, ms1_idx, _s)| (*ms2_idx, *ms1_idx))
.collect();
let spectra = build_pseudo_spectra_from_pairs(
ms1,
ms2,
features,
&final_pairs,
pseudo_opts,
);
(spectra, pairs_scored)
}