1use std::io::{Error, ErrorKind, Write};
5
6use crate::io::Format;
7use crate::spectrum::Spectrum;
8
9#[derive(Debug)]
10pub struct MGFReader<A> {
11 reader: A,
12}
13
14impl<R> MGFReader<std::io::BufReader<R>>
15where
16 R: std::io::Read,
17{
18 pub fn new(reader: R) -> Self {
25 Self {
26 reader: std::io::BufReader::new(reader),
27 }
28 }
29}
30
31impl<R> MGFReader<R>
32where
33 R: std::io::BufRead,
34{
35 pub fn spectra(self) -> Records<R> {
36 Records { reader: self }
37 }
38
39 pub fn read(&mut self, s: &mut Spectrum) -> std::io::Result<()> {
46 let mut line = String::new();
47
48 s.metadata.clear();
49 s.mz.clear();
50 s.intensities.clear();
51
52 self.reader.read_line(&mut line)?;
53
54 if line.is_empty() {
55 return Ok(());
56 };
57
58 loop {
59 if line == "\n" {
60 line.clear();
61 self.reader.read_line(&mut line)?;
62 continue;
63 }
64
65 if line.is_empty() {
66 return Ok(());
67 }
68
69 if line == "BEGIN IONS\n" {
70 line.clear();
71 break;
72 }
73
74 if line != "BEGIN IONS\n" {
75 return Err(std::io::Error::new(
76 ErrorKind::Other,
77 format!("Expected 'BEGIN IONS' to start, got {}", line),
78 ));
79 }
80 }
81
82 loop {
83 self.reader.read_line(&mut line)?;
84
85 if line == "END IONS\n" {
86 break;
87 }
88
89 if line.contains('=') {
90 if let Some((k, v)) = line.trim().split_once("=") {
91 s.metadata.insert(String::from(k), String::from(v));
92 } else {
93 return Err(Error::new(
94 ErrorKind::Other,
95 "Could parse key value metadata.",
96 ));
97 }
98 line.clear();
99 } else if line.contains('\t') {
100 if let Some((raw_mz, raw_intensity)) = line.trim().split_once("\t") {
101 let new_mz: f64 = raw_mz.parse().unwrap();
102 let new_intensity: f64 = raw_intensity.parse().unwrap();
103
104 s.mz.push(new_mz);
105 s.intensities.push(new_intensity);
106 } else {
107 return Err(Error::new(ErrorKind::Other, "Vectors"));
108 }
109 line.clear();
110 } else {
111 return Err(Error::new(
112 ErrorKind::Other,
113 format!("Error parsing data: {}", line),
114 ));
115 }
116 }
117
118 Ok(())
119 }
120}
121
122#[derive(Debug)]
123pub struct MGFWriter<W: Write> {
124 writer: std::io::BufWriter<W>,
125 output_format: Format,
126}
127
128impl<W: Write> MGFWriter<W> {
129 pub fn new(writer: W, output_format: Format) -> Self {
136 MGFWriter {
137 writer: std::io::BufWriter::new(writer),
138 output_format,
139 }
140 }
141
142 pub fn write(&mut self, spectrum: Spectrum) -> std::io::Result<()> {
149 match &self.output_format {
150 Format::Mgf => self.write_mgf(spectrum),
151 Format::Json => self.write_json(spectrum),
152 e => Err(Error::new(
153 ErrorKind::Other,
154 format!("Cannot parse, got output: {:?}", e),
155 )),
156 }
157 }
158
159 pub fn write_json(&mut self, spectrum: Spectrum) -> std::io::Result<()> {
166 let result = serde_json::to_writer(&mut self.writer, &spectrum);
167 self.writer.write_all(b"\n")?;
168
169 match result {
170 Ok(_) => Ok(()),
171 Err(_) => Err(Error::new(
172 ErrorKind::Other,
173 "Error writing json for spectrum.",
174 )),
175 }
176 }
177
178 pub fn write_mgf(&mut self, spectrum: Spectrum) -> std::io::Result<()> {
185 self.writer.write_all(b"BEGIN IONS\n")?;
186
187 for (k, v) in spectrum.metadata.iter() {
188 let metadata = format!("{}={}\n", k, v);
189 self.writer.write_all(metadata.as_bytes())?;
190 }
191
192 let mz_iter = spectrum.mz.iter();
193 let intensities_iter = spectrum.intensities.iter();
194
195 for (m, i) in mz_iter.zip(intensities_iter) {
196 let line = format!("{}\t{}\n", m, i);
197 self.writer.write_all(line.as_bytes())?;
198 }
199
200 self.writer.write_all(b"END IONS\n")?;
201
202 Ok(())
203 }
204}
205
206pub struct Records<R>
207where
208 R: std::io::BufRead,
209{
210 reader: MGFReader<R>,
211}
212
213impl<R> Iterator for Records<R>
214where
215 R: std::io::BufRead,
216{
217 type Item = std::io::Result<Spectrum>;
218
219 fn next(&mut self) -> Option<Self::Item> {
221 let mut record = Spectrum::empty();
222
223 let resp = self.reader.read(&mut record);
224 match resp {
225 Ok(()) if record.is_empty() => None,
226 Ok(()) => Some(Ok(record)),
227 Err(e) => Some(Err(std::io::Error::new(std::io::ErrorKind::Other, e))),
228 }
229 }
230}
231
232#[cfg(test)]
233mod tests {
234 use super::*;
235
236 const MGF_FILE_SIMPLE: &[u8] = b"BEGIN IONS
237PEPMASS=898.727
238SCANS=1
23913.00 1.0
24014.00 1.0
241END IONS
242
243BEGIN IONS
244PEPMASS=898.727
245SCANS=1
246END IONS
247
248
249
250
251BEGIN IONS
25213.00 1.0
253END IONS
254
255
256";
257
258 const SPECTRUM_SIMPLE: &str = r#"
259 [
260 {
261 "metadata": {"PEPMASS": "898.727", "SCANS": "1"},
262 "mz": [13.0, 14.0],
263 "intensities": [1.0, 1.0]
264 },
265 {
266 "metadata": {"PEPMASS": "898.727", "SCANS": "1"},
267 "mz": [],
268 "intensities": []
269 },
270 {
271 "metadata": {},
272 "mz": [13.0],
273 "intensities": [1.0]
274 }
275 ]
276 "#;
277
278 #[test]
279 fn test_reader() {
280 let test_s: Vec<Spectrum> = serde_json::from_str(SPECTRUM_SIMPLE).unwrap();
281
282 let reader = MGFReader::new(MGF_FILE_SIMPLE);
283 let spectra = reader.spectra();
284
285 let filter_s: Vec<Spectrum> = spectra.map(|s| s.unwrap()).collect();
286
287 assert_eq!(test_s, filter_s);
288 }
289}