msn_kit/io/
mgf_parser.rs

1// (c) Copyright 2021 Trent Hauck
2// All Rights Reserved
3//! Module for reading and writing MGF files.
4use std::io::{Error, ErrorKind, Write};
5
6use crate::io::Format;
7use crate::spectrum::Spectrum;
8
9#[derive(Debug)]
10pub struct MGFReader<A> {
11    reader: A,
12}
13
14impl<R> MGFReader<std::io::BufReader<R>>
15where
16    R: std::io::Read,
17{
18    /// Create a new MGFReader from an object that implements Read.
19    ///
20    /// # Arguments
21    ///
22    /// * `reader` - An object that implements the Read trait
23    ///
24    pub fn new(reader: R) -> Self {
25        Self {
26            reader: std::io::BufReader::new(reader),
27        }
28    }
29}
30
31impl<R> MGFReader<R>
32where
33    R: std::io::BufRead,
34{
35    pub fn spectra(self) -> Records<R> {
36        Records { reader: self }
37    }
38
39    /// Read from the underlying reader into spectrum.
40    ///
41    /// # Arguments
42    ///
43    /// * `s` - A spectrum object that will hold the new spectrum data.
44    ///
45    pub fn read(&mut self, s: &mut Spectrum) -> std::io::Result<()> {
46        let mut line = String::new();
47
48        s.metadata.clear();
49        s.mz.clear();
50        s.intensities.clear();
51
52        self.reader.read_line(&mut line)?;
53
54        if line.is_empty() {
55            return Ok(());
56        };
57
58        loop {
59            if line == "\n" {
60                line.clear();
61                self.reader.read_line(&mut line)?;
62                continue;
63            }
64
65            if line.is_empty() {
66                return Ok(());
67            }
68
69            if line == "BEGIN IONS\n" {
70                line.clear();
71                break;
72            }
73
74            if line != "BEGIN IONS\n" {
75                return Err(std::io::Error::new(
76                    ErrorKind::Other,
77                    format!("Expected 'BEGIN IONS' to start, got {}", line),
78                ));
79            }
80        }
81
82        loop {
83            self.reader.read_line(&mut line)?;
84
85            if line == "END IONS\n" {
86                break;
87            }
88
89            if line.contains('=') {
90                if let Some((k, v)) = line.trim().split_once("=") {
91                    s.metadata.insert(String::from(k), String::from(v));
92                } else {
93                    return Err(Error::new(
94                        ErrorKind::Other,
95                        "Could parse key value metadata.",
96                    ));
97                }
98                line.clear();
99            } else if line.contains('\t') {
100                if let Some((raw_mz, raw_intensity)) = line.trim().split_once("\t") {
101                    let new_mz: f64 = raw_mz.parse().unwrap();
102                    let new_intensity: f64 = raw_intensity.parse().unwrap();
103
104                    s.mz.push(new_mz);
105                    s.intensities.push(new_intensity);
106                } else {
107                    return Err(Error::new(ErrorKind::Other, "Vectors"));
108                }
109                line.clear();
110            } else {
111                return Err(Error::new(
112                    ErrorKind::Other,
113                    format!("Error parsing data: {}", line),
114                ));
115            }
116        }
117
118        Ok(())
119    }
120}
121
122#[derive(Debug)]
123pub struct MGFWriter<W: Write> {
124    writer: std::io::BufWriter<W>,
125    output_format: Format,
126}
127
128impl<W: Write> MGFWriter<W> {
129    /// Create a new MGFWriter object.
130    ///
131    /// # Arguments
132    ///
133    /// * `writer` - An object that can be written two.
134    ///
135    pub fn new(writer: W, output_format: Format) -> Self {
136        MGFWriter {
137            writer: std::io::BufWriter::new(writer),
138            output_format,
139        }
140    }
141
142    /// Write spectrum to the underlying buffer in the format.
143    ///
144    /// # Arguments
145    ///
146    /// * `spectrum` - The spectrum to write.
147    ///
148    pub fn write(&mut self, spectrum: Spectrum) -> std::io::Result<()> {
149        match &self.output_format {
150            Format::Mgf => self.write_mgf(spectrum),
151            Format::Json => self.write_json(spectrum),
152            e => Err(Error::new(
153                ErrorKind::Other,
154                format!("Cannot parse, got output: {:?}", e),
155            )),
156        }
157    }
158
159    /// Write spectrum to the underlying buffer in mgf format.
160    ///
161    /// # Arguments
162    ///
163    /// * `spectrum` - The spectrum to write.
164    ///
165    pub fn write_json(&mut self, spectrum: Spectrum) -> std::io::Result<()> {
166        let result = serde_json::to_writer(&mut self.writer, &spectrum);
167        self.writer.write_all(b"\n")?;
168
169        match result {
170            Ok(_) => Ok(()),
171            Err(_) => Err(Error::new(
172                ErrorKind::Other,
173                "Error writing json for spectrum.",
174            )),
175        }
176    }
177
178    /// Write spectrum to the underlying buffer in mgf format.
179    ///
180    /// # Arguments
181    ///
182    /// * `spectrum` - The spectrum to write.
183    ///
184    pub fn write_mgf(&mut self, spectrum: Spectrum) -> std::io::Result<()> {
185        self.writer.write_all(b"BEGIN IONS\n")?;
186
187        for (k, v) in spectrum.metadata.iter() {
188            let metadata = format!("{}={}\n", k, v);
189            self.writer.write_all(metadata.as_bytes())?;
190        }
191
192        let mz_iter = spectrum.mz.iter();
193        let intensities_iter = spectrum.intensities.iter();
194
195        for (m, i) in mz_iter.zip(intensities_iter) {
196            let line = format!("{}\t{}\n", m, i);
197            self.writer.write_all(line.as_bytes())?;
198        }
199
200        self.writer.write_all(b"END IONS\n")?;
201
202        Ok(())
203    }
204}
205
206pub struct Records<R>
207where
208    R: std::io::BufRead,
209{
210    reader: MGFReader<R>,
211}
212
213impl<R> Iterator for Records<R>
214where
215    R: std::io::BufRead,
216{
217    type Item = std::io::Result<Spectrum>;
218
219    /// Implements the next ethod for the Records iterator.
220    fn next(&mut self) -> Option<Self::Item> {
221        let mut record = Spectrum::empty();
222
223        let resp = self.reader.read(&mut record);
224        match resp {
225            Ok(()) if record.is_empty() => None,
226            Ok(()) => Some(Ok(record)),
227            Err(e) => Some(Err(std::io::Error::new(std::io::ErrorKind::Other, e))),
228        }
229    }
230}
231
232#[cfg(test)]
233mod tests {
234    use super::*;
235
236    const MGF_FILE_SIMPLE: &[u8] = b"BEGIN IONS
237PEPMASS=898.727
238SCANS=1
23913.00	1.0
24014.00	1.0
241END IONS
242
243BEGIN IONS
244PEPMASS=898.727
245SCANS=1
246END IONS
247
248
249
250
251BEGIN IONS
25213.00	1.0
253END IONS
254
255
256";
257
258    const SPECTRUM_SIMPLE: &str = r#"
259    [
260        {
261            "metadata": {"PEPMASS": "898.727", "SCANS": "1"},
262            "mz": [13.0, 14.0],
263            "intensities": [1.0, 1.0]
264        },
265        {
266            "metadata": {"PEPMASS": "898.727", "SCANS": "1"},
267            "mz": [],
268            "intensities": []
269        },
270        {
271            "metadata": {},
272            "mz": [13.0],
273            "intensities": [1.0]
274        }
275    ]
276    "#;
277
278    #[test]
279    fn test_reader() {
280        let test_s: Vec<Spectrum> = serde_json::from_str(SPECTRUM_SIMPLE).unwrap();
281
282        let reader = MGFReader::new(MGF_FILE_SIMPLE);
283        let spectra = reader.spectra();
284
285        let filter_s: Vec<Spectrum> = spectra.map(|s| s.unwrap()).collect();
286
287        assert_eq!(test_s, filter_s);
288    }
289}