1#![warn(missing_docs)]
2#![warn(rustdoc::missing_doc_code_examples)]
3
4pub mod error;
19pub mod imzml;
21pub mod mzml;
23
24pub mod obo;
26pub mod validation;
28use std::io::ErrorKind;
31use std::io::Read;
32use std::io::Seek;
33use std::io::SeekFrom;
34use std::sync::Arc;
35use std::sync::Mutex;
36
37use byteorder::LittleEndian;
38use byteorder::ReadBytesExt;
39use error::DataError;
40use error::FatalParseError;
41use error::ParseError;
42use mzml::Tag;
43
44pub use self::imzml::ImzML;
45pub use self::imzml::ImzMLReader;
46pub use mzml::mzml::MzML;
47pub use mzml::MzMLReader;
48
49pub use self::mzml::binarydataarray::*;
50pub use self::mzml::mzml::*;
51pub use self::mzml::scan::*;
53pub use self::mzml::spectrum::*;
54
55#[macro_use]
56extern crate lazy_static;
57
58const BUFFER_SIZE: usize = 0x1000;
59
60#[derive(Debug)]
63pub struct DataAccess<D: Read + Seek> {
64 data_location: Arc<Mutex<D>>,
65
66 array_length: Option<u64>,
67 encoded_length: u64,
68 offset: u64,
69
70 data_description: DataDescription,
71}
72
73pub struct WritableSpectrum {
75 pub spectrum: Spectrum,
77 pub mz_array: Option<Vec<f64>>,
79 pub intensity_array: Option<Vec<f64>>,
81}
82
83impl<D: Read + Seek> DataAccess<D> {
109 pub fn from_binary_data_array(
111 bda: &BinaryDataArray,
112 data_location: Arc<Mutex<D>>,
113 ) -> Option<Self> {
114 if let (Some(offset), Some(encoded_length)) = (bda.offset(), bda.encoded_length()) {
115 Some(DataAccess {
116 data_location,
117 encoded_length,
118 offset,
119 array_length: bda.array_length(),
120 data_description: DataDescription {
121 is_base64_encoded: !bda.is_data_external(),
122 compression: bda.compression(),
123 binary_type: bda.binary_type(),
124 },
125 })
126 } else {
127 None
128 }
129 }
130
131 fn reader(&self) -> Result<CompressionReader, DataError> {
132 let mut reader = self.data_location.lock()?;
133
134 reader.seek(SeekFrom::Start(self.offset))?;
135 let mut buf = vec![0; self.encoded_length as usize];
136 reader.read_exact(&mut buf)?;
137
138 drop(reader);
140
141 if self.data_description.is_base64_encoded {
142 buf = base64::decode(&buf)?;
144 }
145
146 Ok(self.data_description.compression.to_reader(buf))
147 }
148
149 pub fn offset(&self) -> u64 {
151 self.offset
152 }
153
154 pub fn encoded_length(&self) -> u64 {
156 self.encoded_length
157 }
158
159 pub fn binary_type(&self) -> BinaryDataType {
161 self.data_description.binary_type()
162 }
163
164 pub fn as_f64(&self) -> Result<Vec<f64>, DataError> {
166 let mut reader = self.reader()?;
167
168 let mut result = match self.array_length {
169 Some(array_length) => Vec::with_capacity(array_length as usize),
170 None => Vec::new(),
171 };
172
173 match self.data_description.binary_type {
174 BinaryDataType::Undefined => Err(DataError::UnknownDataType),
175 BinaryDataType::Float64 => {
176 loop {
177 match reader.read_f64::<LittleEndian>() {
178 Ok(value) => result.push(value),
179 Err(error) => {
180 if error.kind() == ErrorKind::UnexpectedEof {
181 break;
182 }
183
184 return Err(DataError::IOError(error));
185 }
186 }
187 }
188
189 Ok(result)
190 }
191 BinaryDataType::Float32 => {
192 loop {
193 match reader.read_f32::<LittleEndian>() {
194 Ok(value) => result.push(value as f64),
195 Err(error) => {
196 if error.kind() == ErrorKind::UnexpectedEof {
197 break;
198 }
199
200 return Err(DataError::IOError(error));
201 }
202 }
203 }
204
205 Ok(result)
206 }
207 }
208 }
209}
210
211pub struct SpectrumAccess<D: Read + Seek> {
219 spectrum: Arc<Spectrum>,
220
221 mz_array: Option<DataAccess<D>>,
222 intensity_array: Option<DataAccess<D>>,
223}
224
225impl<D: Read + Seek> SpectrumAccess<D> {
244 pub fn new(data_location: Arc<Mutex<D>>, spectrum: Arc<Spectrum>) -> Self {
246 let mz_array = match spectrum.mz_array() {
247 Some(mz_array) => DataAccess::from_binary_data_array(mz_array, data_location.clone()),
248 None => None,
249 };
250 let intensity_array = match spectrum.intensity_array() {
251 Some(intensity_array) => {
252 DataAccess::from_binary_data_array(intensity_array, data_location)
253 }
254 None => None,
255 };
256
257 Self {
258 spectrum,
259 mz_array,
260 intensity_array,
261 }
262 }
263
264 pub fn mz_array(&self) -> Option<&DataAccess<D>> {
266 self.mz_array.as_ref()
267 }
268
269 pub fn intensity_array(&self) -> Option<&DataAccess<D>> {
271 self.intensity_array.as_ref()
272 }
273
274 pub fn spectrum(&self) -> &Spectrum {
276 &self.spectrum
277 }
278
279 pub fn representation(&self) -> Option<Representation> {
281 self.spectrum.representation()
282 }
283
284 pub fn polarity(&self) -> Option<Polarity> {
286 self.spectrum.polarity()
287 }
288
289 pub fn coordinate(&self) -> Option<Coordinate> {
291 let x = self.spectrum.x_position()?;
292 let y = self.spectrum.y_position()?;
293
294 Some(Coordinate { x, y, z: None })
295 }
296}
297
298pub enum ScanLocation {
301 Ignore,
303 Location(Coordinate),
305}
306
307#[derive(Debug, Hash, PartialEq, Eq)]
309pub struct Coordinate {
310 pub x: u32,
312 pub y: u32,
314 pub z: Option<u32>,
316}
317
318#[derive(Debug)]
319struct DataDescription {
320 is_base64_encoded: bool,
321
322 compression: Compression,
323 binary_type: BinaryDataType,
324}
325
326impl Default for DataDescription {
327 fn default() -> Self {
328 Self {
329 is_base64_encoded: false,
330 compression: Compression::None,
331 binary_type: BinaryDataType::Float64,
332 }
333 }
334}
335
336impl DataDescription {
337 pub fn binary_type(&self) -> BinaryDataType {
338 self.binary_type
339 }
340}
341
342pub struct SpectrumAccessIterator<D: Read + Seek> {
361 data_location: Arc<Mutex<D>>,
362 spectra: Vec<Arc<Spectrum>>,
364
365 next_index: usize,
366}
367
368impl<D: Read + Seek> SpectrumAccessIterator<D> {
379 fn new(data_location: Arc<Mutex<D>>, spectrum_list: &SpectrumList) -> Self {
380 Self {
381 data_location,
382 spectra: spectrum_list.spectra().to_vec(),
383 next_index: 0,
384 }
385 }
386}
387
388impl<D: Read + Seek> Iterator for SpectrumAccessIterator<D> {
389 type Item = SpectrumAccess<D>;
390
391 fn next(&mut self) -> Option<Self::Item> {
392 let cur_index = self.next_index;
393 self.next_index += 1;
394
395 self.spectra
396 .get(cur_index)
397 .map(|spectrum| SpectrumAccess::new(self.data_location.clone(), spectrum.clone()))
398 }
399}
400
401#[cfg(test)]
416mod tests {
417 use crate::imzml::ImzMLReader;
418 use crate::mzml::writer::Writer;
419 use crate::mzml::{DataReader, MzMLReader, MzMLTag};
420
421 use std::fs::File;
422 use std::io::{BufReader, BufWriter};
423 use std::time::Instant;
424
425 #[test]
426 fn test_all_in_test_folder() -> std::io::Result<()> {
427 let paths = std::fs::read_dir("../test/")?;
428
429 for path in paths {
432 let path = path?.path();
433 let extension = path.extension().unwrap();
434 let extension = extension.to_str().unwrap();
435
436 if !(extension == "imzML" || extension == "mzML") {
437 continue;
438 }
439
440 println!("Processing {:?}", path);
441
442 let start = Instant::now();
443
444 match extension {
445 "imzML" => {
446 let parser = ImzMLReader::from_path(path).unwrap();
447
448 let duration = start.elapsed();
449 println!("Time elapsed when parsing imzML is: {:?}", duration);
450
451 for error in parser.errors() {
452 println!("{}", error);
453 }
454 }
455 "mzML" => {
456 let parser = MzMLReader::from_path(path).unwrap();
457
458 let duration = start.elapsed();
459 println!("Time elapsed when parsing imzML is: {:?}", duration);
460
461 for error in parser.errors() {
462 println!("{}", error);
463 }
464 }
465 _ => {}
466 };
467
468 }
488
489 Ok(())
490 }
491
492 #[test]
493 fn read_mzml_in_test_folder() -> std::io::Result<()> {
494 let paths = std::fs::read_dir("../test/")?;
495
496 for path in paths {
499 let path = path?;
500
501 let path = path.path();
502 let extension = path.extension().unwrap();
503 let extension = extension.to_str().unwrap();
504
505 if extension != "mzML" {
506 continue;
507 }
508
509 println!("Processing [data] {:?}", path);
510
511 let start = Instant::now();
512
513 let parser = DataReader::from_path(path).unwrap();
514 let duration = start.elapsed();
517 println!("Time elapsed when parsing mzML is: {:?}", duration);
518
519 let spectrum = parser.spectrum(0).unwrap();
520
521 let mzs = spectrum.mz_array().unwrap().as_f64().unwrap();
522
523 println!("{:?}", &mzs[..10]);
524
525 }
542
543 Ok(())
544 }
545
546 #[ignore]
547 #[test]
548 fn test_all_in_folder() -> std::io::Result<()> {
549 let paths = std::fs::read_dir(
550 "/home/alan/Documents/GitProjects/gomsi/metaspace/cmd/metaspacedownloader/output/",
551 )
552 .unwrap();
553
554 for path in paths {
557 let path = path?;
558
559 if path.path().extension().unwrap() != "imzML" {
560 println!("Skipping {:?}.", path.path());
561 continue;
562 }
563
564 println!("Processing {:?}", path.path());
565
566 let start = Instant::now();
567
568 let file = File::open(path.path()).unwrap();
569 let header_reader = BufReader::new(file);
570
571 let parser = MzMLReader::new(header_reader).unwrap();
572 let duration = start.elapsed();
575 println!("Time elapsed when parsing imzML is: {:?}", duration);
576
577 for error in parser.errors() {
578 println!("{}", error);
579 }
580 }
581
582 Ok(())
583 }
584
585 #[ignore]
586 #[test]
587 fn test_obo() {
588 let start = Instant::now();
589 let ontology = crate::obo::parser::parse("imagingMS.obo").unwrap();
590 let duration = start.elapsed();
591 println!("Time elapsed when parsing ontology is: {:?}", duration);
592
593 let filename = "/home/alan/Documents/GitProjects/gomsi/metaspace/cmd/metaspacedownloader/output/2017-06-30_07h26m26s_Mousebrain_MG08_2017_GruppeF.imzML";
594 println!("[Parser] Reading with ontology");
595 let parser = ImzMLReader::from_path_with_ontology(filename, ontology.clone()).unwrap();
596
597 let duration = start.elapsed();
598 println!("Time elapsed when parsing imzML is: {:?}", duration);
599
600 for error in parser.errors() {
601 println!("{}", error);
602 }
603
604 let imzml = parser.mzml().unwrap();
605
606 let mut errors = Vec::new();
609
610 let mapping = crate::validation::parse("ms-mapping.xml").unwrap();
611 mapping.validate(&ontology, imzml, &mut errors);
612
613 let mapping = crate::validation::parse("Ims1.1-mapping.xml").unwrap();
614 mapping.validate(&ontology, imzml, &mut errors);
615
616 let file = File::create("tmp.xml").unwrap();
617
618 let mut writer = Writer::new(BufWriter::new(file)).unwrap(); parser.mzml().unwrap().write_xml(&mut writer).unwrap();
621 }
622
623 #[test]
624 #[ignore]
625 fn test_parser() {
626 let filename = "/home/alan/Documents/GitProjects/gomsi/metaspace/cmd/metaspacedownloader/output/2017-06-30_07h26m26s_Mousebrain_MG08_2017_GruppeF.imzML";
628 let start = Instant::now();
633 let parser = ImzMLReader::from_path(filename).unwrap();
634 let duration = start.elapsed();
635
636 for error in parser.errors() {
637 println!("{}", error);
638 }
639
640 let file = File::create("tmp.xml").unwrap();
643
644 let mut writer = Writer::new(BufWriter::new(file)).unwrap(); parser.mzml().unwrap().write_xml(&mut writer).unwrap();
647
648 println!("Time elapsed when parsing Mousebrain is: {:?}", duration);
649
650 println!("{:?}", parser.mzml().unwrap().spectrum(0));
651 println!(
652 "{:?}",
653 parser.mzml().unwrap().spectrum(0).unwrap().x_position()
654 );
655 println!(
656 "{:?}",
657 parser
658 .mzml()
659 .unwrap()
660 .referenceable_param_group_ref("mzArray")
661 );
662 }
663}