1use std::collections::HashMap;
32
33use crate::source::SpectrumSource;
34use crate::types::SpectrumRecord;
35
36const FLOAT_REL_TOL: f64 = 1e-4;
38
39#[derive(Debug)]
41pub enum ConformanceError {
42 PeakArrayLengthMismatch {
44 native_id: String,
45 mz_len: usize,
46 intensity_len: usize,
47 },
48 MobilityArrayLengthMismatch {
50 native_id: String,
51 mz_len: usize,
52 mobility_len: usize,
53 },
54 TicMismatch {
56 native_id: String,
57 declared: f64,
58 computed: f64,
59 },
60 BasePeakIntensityMismatch {
62 native_id: String,
63 declared: f64,
64 computed: f64,
65 },
66 MissingPrecursor { native_id: String, ms_level: u32 },
68 RetentionTimeNonMonotonic {
70 prefix: String,
71 previous: f64,
72 current: f64,
73 native_id: String,
74 },
75 IndexSequence {
77 native_id: String,
78 previous: Option<usize>,
79 current: usize,
80 },
81 EmptySpectrum { native_id: String },
83}
84
85impl std::fmt::Display for ConformanceError {
86 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
87 match self {
88 Self::PeakArrayLengthMismatch {
89 native_id,
90 mz_len,
91 intensity_len,
92 } => write!(
93 f,
94 "{native_id}: mz.len()={mz_len} != intensity.len()={intensity_len}"
95 ),
96 Self::MobilityArrayLengthMismatch {
97 native_id,
98 mz_len,
99 mobility_len,
100 } => write!(
101 f,
102 "{native_id}: inv_mobility_per_peak.len()={mobility_len} != mz.len()={mz_len}"
103 ),
104 Self::TicMismatch {
105 native_id,
106 declared,
107 computed,
108 } => {
109 write!(
110 f,
111 "{native_id}: declared TIC {declared} != computed {computed}"
112 )
113 }
114 Self::BasePeakIntensityMismatch {
115 native_id,
116 declared,
117 computed,
118 } => write!(
119 f,
120 "{native_id}: declared base-peak intensity {declared} != max(intensity) {computed}"
121 ),
122 Self::MissingPrecursor {
123 native_id,
124 ms_level,
125 } => write!(f, "{native_id}: ms_level={ms_level} but no precursor info"),
126 Self::RetentionTimeNonMonotonic {
127 prefix,
128 previous,
129 current,
130 native_id,
131 } => write!(
132 f,
133 "{native_id}: RT regressed in stream '{prefix}' ({previous} -> {current})"
134 ),
135 Self::IndexSequence {
136 native_id,
137 previous,
138 current,
139 } => write!(
140 f,
141 "{native_id}: index sequence broken (prev={previous:?}, current={current})"
142 ),
143 Self::EmptySpectrum { native_id } => write!(f, "{native_id}: empty spectrum"),
144 }
145 }
146}
147
148impl std::error::Error for ConformanceError {}
149
150fn rel_close(a: f64, b: f64, tol: f64) -> bool {
151 let scale = a.abs().max(b.abs()).max(1.0);
152 (a - b).abs() <= tol * scale
153}
154
155fn rt_stream_key(native_id: &str) -> String {
162 native_id
163 .split_whitespace()
164 .filter(|tok| !tok.starts_with("scan="))
165 .collect::<Vec<_>>()
166 .join(" ")
167}
168
169pub fn assert_source_invariants<S: SpectrumSource>(src: &mut S) -> Result<usize, ConformanceError> {
175 assert_iter_invariants(src.iter_spectra())
176}
177
178pub fn assert_iter_invariants<I: IntoIterator<Item = SpectrumRecord>>(
182 iter: I,
183) -> Result<usize, ConformanceError> {
184 let mut last_index: Option<usize> = None;
185 let mut last_rt: HashMap<String, f64> = HashMap::new();
186 let mut count = 0usize;
187 for spectrum in iter {
188 check_one(&spectrum, last_index, &mut last_rt)?;
189 last_index = Some(spectrum.index);
190 count += 1;
191 }
192 Ok(count)
193}
194
195fn check_one(
196 s: &SpectrumRecord,
197 last_index: Option<usize>,
198 last_rt: &mut HashMap<String, f64>,
199) -> Result<(), ConformanceError> {
200 if s.mz.len() != s.intensity.len() {
201 return Err(ConformanceError::PeakArrayLengthMismatch {
202 native_id: s.native_id.clone(),
203 mz_len: s.mz.len(),
204 intensity_len: s.intensity.len(),
205 });
206 }
207 if let Some(mob) = &s.inv_mobility_per_peak {
208 if mob.len() != s.mz.len() {
209 return Err(ConformanceError::MobilityArrayLengthMismatch {
210 native_id: s.native_id.clone(),
211 mz_len: s.mz.len(),
212 mobility_len: mob.len(),
213 });
214 }
215 }
216 if let Some(tic) = s.total_ion_current {
217 let computed: f64 = s.intensity.iter().map(|&v| v as f64).sum();
218 if !rel_close(tic, computed, FLOAT_REL_TOL) {
219 return Err(ConformanceError::TicMismatch {
220 native_id: s.native_id.clone(),
221 declared: tic,
222 computed,
223 });
224 }
225 }
226 if let Some(bp) = s.base_peak_intensity {
227 if s.intensity.is_empty() {
228 let _ = bp;
230 } else {
231 let computed = s
232 .intensity
233 .iter()
234 .copied()
235 .fold(f32::NEG_INFINITY, f32::max) as f64;
236 if !rel_close(bp, computed, FLOAT_REL_TOL) {
237 return Err(ConformanceError::BasePeakIntensityMismatch {
238 native_id: s.native_id.clone(),
239 declared: bp,
240 computed,
241 });
242 }
243 }
244 }
245 if s.ms_level >= 2 {
246 let has_precursor = match &s.precursor {
247 Some(p) => {
248 p.target_mz.is_some() || p.selected_mz.is_some() || p.precursor_native_id.is_some()
249 }
250 None => false,
251 };
252 if !has_precursor {
253 return Err(ConformanceError::MissingPrecursor {
254 native_id: s.native_id.clone(),
255 ms_level: s.ms_level,
256 });
257 }
258 }
259 match last_index {
260 None => {
261 if s.index != 0 {
262 return Err(ConformanceError::IndexSequence {
263 native_id: s.native_id.clone(),
264 previous: None,
265 current: s.index,
266 });
267 }
268 }
269 Some(prev) => {
270 if s.index <= prev {
271 return Err(ConformanceError::IndexSequence {
272 native_id: s.native_id.clone(),
273 previous: Some(prev),
274 current: s.index,
275 });
276 }
277 }
278 }
279 let key = rt_stream_key(&s.native_id);
280 if let Some(prev) = last_rt.get(&key).copied() {
281 if s.retention_time_sec + 1e-6 < prev {
283 return Err(ConformanceError::RetentionTimeNonMonotonic {
284 prefix: key,
285 previous: prev,
286 current: s.retention_time_sec,
287 native_id: s.native_id.clone(),
288 });
289 }
290 }
291 last_rt.insert(key, s.retention_time_sec);
292 Ok(())
293}
294
295#[cfg(test)]
296mod tests {
297 use super::*;
298 use crate::types::RunMetadata;
299 use crate::{Polarity, PrecursorInfo, SpectrumRecord};
300
301 struct ToySource(Vec<SpectrumRecord>);
302
303 impl SpectrumSource for ToySource {
304 fn run_metadata(&self) -> RunMetadata {
305 RunMetadata {
306 source_file_name: "toy".into(),
307 source_file_format: crate::CvTerm {
308 accession: "MS:1000563",
309 name: "Thermo RAW format".into(),
310 },
311 native_id_format: crate::CvTerm {
312 accession: "MS:1000768",
313 name: "Thermo nativeID format".into(),
314 },
315 instrument: crate::CvTerm {
316 accession: "MS:1000483",
317 name: "Thermo Fisher Scientific instrument model".into(),
318 },
319 software_name: "test".into(),
320 software_version: "0.0".into(),
321 start_timestamp: None,
322 mobility_array_kind: None,
323 }
324 }
325 fn iter_spectra<'a>(&'a mut self) -> Box<dyn Iterator<Item = SpectrumRecord> + 'a> {
326 Box::new(self.0.clone().into_iter())
327 }
328 }
329
330 fn ok_spec(index: usize, ms_level: u32, rt: f64) -> SpectrumRecord {
331 let mz = vec![100.0, 200.0];
332 let intensity = vec![10.0f32, 20.0];
333 SpectrumRecord {
334 index,
335 scan_number: (index + 1) as u32,
336 native_id: format!("controllerType=0 controllerNumber=1 scan={}", index + 1),
337 ms_level,
338 polarity: Some(Polarity::Positive),
339 scan_mode: None,
340 analyzer: None,
341 filter: None,
342 retention_time_sec: rt,
343 total_ion_current: Some(30.0),
344 base_peak_mz: Some(200.0),
345 base_peak_intensity: Some(20.0),
346 low_mz: Some(100.0),
347 high_mz: Some(200.0),
348 ion_injection_time_ms: None,
349 inv_mobility: None,
350 precursor: if ms_level >= 2 {
351 Some(PrecursorInfo {
352 selected_mz: Some(500.0),
353 ..Default::default()
354 })
355 } else {
356 None
357 },
358 mz,
359 intensity,
360 inv_mobility_per_peak: None,
361 }
362 }
363
364 #[test]
365 fn happy_path_passes() {
366 let mut src = ToySource(vec![
367 ok_spec(0, 1, 1.0),
368 ok_spec(1, 2, 2.0),
369 ok_spec(2, 1, 3.0),
370 ]);
371 assert_eq!(assert_source_invariants(&mut src).unwrap(), 3);
372 }
373
374 #[test]
375 fn detects_tic_mismatch() {
376 let mut s = ok_spec(0, 1, 1.0);
377 s.total_ion_current = Some(999.0);
378 let mut src = ToySource(vec![s]);
379 let err = assert_source_invariants(&mut src).unwrap_err();
380 assert!(matches!(err, ConformanceError::TicMismatch { .. }));
381 }
382
383 #[test]
384 fn detects_missing_precursor() {
385 let mut s = ok_spec(0, 2, 1.0);
386 s.precursor = None;
387 let mut src = ToySource(vec![s]);
388 let err = assert_source_invariants(&mut src).unwrap_err();
389 assert!(matches!(err, ConformanceError::MissingPrecursor { .. }));
390 }
391
392 #[test]
393 fn detects_rt_regression_within_same_stream() {
394 let s0 = ok_spec(0, 1, 5.0);
395 let s1 = ok_spec(1, 1, 2.0);
396 let mut src = ToySource(vec![s0, s1]);
397 let err = assert_source_invariants(&mut src).unwrap_err();
398 assert!(matches!(
399 err,
400 ConformanceError::RetentionTimeNonMonotonic { .. }
401 ));
402 }
403
404 #[test]
405 fn detects_mobility_length_mismatch() {
406 let mut s = ok_spec(0, 1, 1.0);
407 s.inv_mobility_per_peak = Some(vec![0.5]);
408 let mut src = ToySource(vec![s]);
409 let err = assert_source_invariants(&mut src).unwrap_err();
410 assert!(matches!(
411 err,
412 ConformanceError::MobilityArrayLengthMismatch { .. }
413 ));
414 }
415}