openproteo_core/source.rs
1//! The `SpectrumSource` trait: every vendor parser implements this.
2
3use crate::types::{ChromatogramRecord, RunMetadata, SpectrumRecord};
4
5/// A source of decoded mass spectra.
6///
7/// Vendors implement this on whatever value carries their open file state
8/// (e.g. `RawFileReader` + a `&mut Read+Seek` source for opentfraw, a
9/// `Reader` for opentimstdf).
10///
11/// The trait deliberately uses boxed iterators rather than RPITIT so that
12/// implementations can pick a different underlying iterator type per call
13/// without leaking that into the trait signature, and so consumers can hold
14/// a `&mut dyn SpectrumSource` for downstream plumbing (mzML writer, ingest
15/// pipelines, language bindings).
16pub trait SpectrumSource {
17 /// Run-level metadata. Cheap to call; vendors typically build this once.
18 fn run_metadata(&self) -> RunMetadata;
19
20 /// Iterate every spectrum the file contains. Spectra the parser cannot
21 /// decode should be skipped silently; the writer trusts whatever the
22 /// iterator yields.
23 ///
24 /// The iterator borrows `self` mutably so vendors can stream from disk
25 /// without buffering the whole run in memory.
26 fn iter_spectra<'a>(&'a mut self) -> Box<dyn Iterator<Item = SpectrumRecord> + 'a>;
27
28 /// Iterate chromatogram traces (TIC, BPC, SRM). Defaults to an empty
29 /// iterator; most parsers do not synthesize chromatograms.
30 fn iter_chromatograms<'a>(&'a mut self) -> Box<dyn Iterator<Item = ChromatogramRecord> + 'a> {
31 Box::new(std::iter::empty())
32 }
33
34 /// Total number of spectra the source will yield, when known cheaply.
35 /// Used by the mzML writer to populate `<spectrumList count="...">`. If
36 /// `None`, the writer falls back to buffering spectrum offsets and
37 /// patching the count at the end.
38 fn spectrum_count_hint(&self) -> Option<usize> {
39 None
40 }
41}