imzml/
lib.rs

1#![warn(missing_docs)]
2#![warn(rustdoc::missing_doc_code_examples)]
3
4//! This library provides a means of accessing mass spectrometry and mass spectrometry imaging data stored in either the
5//! .mzML or .imzML data formats.
6//!
7//! let parser = ImzMLReader::from_path("/path/to/data.imzML").unwrap();
8//!
9//! for error in parser.errors() {
10//!     println!("{:?}", error);
11//! }
12//!
13//! let imzml: ImzML<_> = parser.into();
14//!
15//! imzml.ion_image(772.573, 100.0);
16
17/// Error handling
18pub mod error;
19/// ImzML file parsing (relies heavily on `mzml`)
20pub mod imzml;
21/// MzML file parsing
22pub mod mzml;
23
24/// Parsing ontologies stored in the OBO format
25pub mod obo;
26/// Validating .mzML and .imzML files
27pub mod validation;
28//mod writer;
29
30use std::io::ErrorKind;
31use std::io::Read;
32use std::io::Seek;
33use std::io::SeekFrom;
34use std::sync::Arc;
35use std::sync::Mutex;
36
37use byteorder::LittleEndian;
38use byteorder::ReadBytesExt;
39use error::DataError;
40use error::FatalParseError;
41use error::ParseError;
42use mzml::Tag;
43
44pub use self::imzml::ImzML;
45pub use self::imzml::ImzMLReader;
46pub use mzml::mzml::MzML;
47pub use mzml::MzMLReader;
48
49pub use self::mzml::binarydataarray::*;
50pub use self::mzml::mzml::*;
51//pub use self::parser::*;
52pub use self::mzml::scan::*;
53pub use self::mzml::spectrum::*;
54
55#[macro_use]
56extern crate lazy_static;
57
58const BUFFER_SIZE: usize = 0x1000;
59
60/// Enables access to data stored in `data_location`. The data could be compressed and/or encoded
61/// and stored with various different types. This information is captured in the `data_description`
62#[derive(Debug)]
63pub struct DataAccess<D: Read + Seek> {
64    data_location: Arc<Mutex<D>>,
65
66    array_length: Option<u64>,
67    encoded_length: u64,
68    offset: u64,
69
70    data_description: DataDescription,
71}
72
73/// WritableSpectrum contains information required to be able to write out a spectrum to imzML.
74pub struct WritableSpectrum {
75    /// Spectrum metadata
76    pub spectrum: Spectrum,
77    /// m/z values
78    pub mz_array: Option<Vec<f64>>,
79    /// Intensity values
80    pub intensity_array: Option<Vec<f64>>,
81}
82
83// impl<'obo> WritableSpectrum<'obo> {
84//     fn description(&mut self) -> &mut Spectrum<'obo> {
85//         &mut self.spectrum
86//     }
87
88//     fn mz_array(&self) -> Option<&Vec<f64>> {
89//         self.mz_array.as_ref()
90//     }
91
92//     fn intensity_array(&self) -> Option<&Vec<f64>> {
93//         self.intensity_array.as_ref()
94//     }
95// }
96// pub trait DataAccess {
97//     fn as_f64(&self) -> Result<Vec<f64>, DataError>;
98// }
99
100// pub trait SpectrumAccess<'obo> {
101//     type DataAccess: DataAccess;
102
103//     fn spectrum(&self) -> &Spectrum<'obo>;
104//     fn mz_array(&self) -> Option<&Self::DataAccess>;
105//     fn intensity_array(&self) -> Option<&Self::DataAccess>;
106// }
107
108impl<D: Read + Seek> DataAccess<D> {
109    /// Create a DataAccess instance from a `BinaryDataArray`, using the specified `data_location`
110    pub fn from_binary_data_array(
111        bda: &BinaryDataArray,
112        data_location: Arc<Mutex<D>>,
113    ) -> Option<Self> {
114        if let (Some(offset), Some(encoded_length)) = (bda.offset(), bda.encoded_length()) {
115            Some(DataAccess {
116                data_location,
117                encoded_length,
118                offset,
119                array_length: bda.array_length(),
120                data_description: DataDescription {
121                    is_base64_encoded: !bda.is_data_external(),
122                    compression: bda.compression(),
123                    binary_type: bda.binary_type(),
124                },
125            })
126        } else {
127            None
128        }
129    }
130
131    fn reader(&self) -> Result<CompressionReader, DataError> {
132        let mut reader = self.data_location.lock()?;
133
134        reader.seek(SeekFrom::Start(self.offset))?;
135        let mut buf = vec![0; self.encoded_length as usize];
136        reader.read_exact(&mut buf)?;
137
138        // We can free the guard now before we move on to decoding
139        drop(reader);
140
141        if self.data_description.is_base64_encoded {
142            // Base64 decoding
143            buf = base64::decode(&buf)?;
144        }
145
146        Ok(self.data_description.compression.to_reader(buf))
147    }
148
149    /// Return the offset (in the file) of the data
150    pub fn offset(&self) -> u64 {
151        self.offset
152    }
153
154    /// Return the length (in bytes) of the data in the file
155    pub fn encoded_length(&self) -> u64 {
156        self.encoded_length
157    }
158
159    /// Returns the binary type of the data (e.g. 64-bit float)
160    pub fn binary_type(&self) -> BinaryDataType {
161        self.data_description.binary_type()
162    }
163
164    /// Decode, decompress and convert (if necessary) data points to f64
165    pub fn as_f64(&self) -> Result<Vec<f64>, DataError> {
166        let mut reader = self.reader()?;
167
168        let mut result = match self.array_length {
169            Some(array_length) => Vec::with_capacity(array_length as usize),
170            None => Vec::new(),
171        };
172
173        match self.data_description.binary_type {
174            BinaryDataType::Undefined => Err(DataError::UnknownDataType),
175            BinaryDataType::Float64 => {
176                loop {
177                    match reader.read_f64::<LittleEndian>() {
178                        Ok(value) => result.push(value),
179                        Err(error) => {
180                            if error.kind() == ErrorKind::UnexpectedEof {
181                                break;
182                            }
183
184                            return Err(DataError::IOError(error));
185                        }
186                    }
187                }
188
189                Ok(result)
190            }
191            BinaryDataType::Float32 => {
192                loop {
193                    match reader.read_f32::<LittleEndian>() {
194                        Ok(value) => result.push(value as f64),
195                        Err(error) => {
196                            if error.kind() == ErrorKind::UnexpectedEof {
197                                break;
198                            }
199
200                            return Err(DataError::IOError(error));
201                        }
202                    }
203                }
204
205                Ok(result)
206            }
207        }
208    }
209}
210
211// #[derive(Debug)]
212// pub enum DataAccess<D: BufRead + Seek> {
213//     OnDisk(OnDiskAccess<D>),
214//     InMemory(Vec<u8>),
215// }
216
217/// Provide ability to access a spectrum (contains `DataAccess` for both m/z and intensity arrays)
218pub struct SpectrumAccess<D: Read + Seek> {
219    spectrum: Arc<Spectrum>,
220
221    mz_array: Option<DataAccess<D>>,
222    intensity_array: Option<DataAccess<D>>,
223}
224
225// impl<'obo, D: BufRead + Seek> SpectrumAccess<'obo, D> {
226//     type DataAccess = DataAccess<D>;
227
228//     /// Returns the `DataAccess` for the m/z array
229//     fn mz_array(&self) -> Option<&DataAccess<D>> {
230//         self.mz_array.as_ref()
231//     }
232
233//     /// Returns the `DataAccess` for the intensity array
234//     fn intensity_array(&self) -> Option<&DataAccess<D>> {
235//         self.intensity_array.as_ref()
236//     }
237
238//     fn spectrum(&self) -> &Spectrum<'obo> {
239//         &self.spectrum
240//     }
241// }
242
243impl<D: Read + Seek> SpectrumAccess<D> {
244    /// Creates a new `SpectrumAccess` from the specified `data_location` and `Spectrum`
245    pub fn new(data_location: Arc<Mutex<D>>, spectrum: Arc<Spectrum>) -> Self {
246        let mz_array = match spectrum.mz_array() {
247            Some(mz_array) => DataAccess::from_binary_data_array(mz_array, data_location.clone()),
248            None => None,
249        };
250        let intensity_array = match spectrum.intensity_array() {
251            Some(intensity_array) => {
252                DataAccess::from_binary_data_array(intensity_array, data_location)
253            }
254            None => None,
255        };
256
257        Self {
258            spectrum,
259            mz_array,
260            intensity_array,
261        }
262    }
263
264    /// Returns the `DataAccess` for the m/z array
265    pub fn mz_array(&self) -> Option<&DataAccess<D>> {
266        self.mz_array.as_ref()
267    }
268
269    /// Returns the `DataAccess` for the intensity array
270    pub fn intensity_array(&self) -> Option<&DataAccess<D>> {
271        self.intensity_array.as_ref()
272    }
273
274    /// Returns the spectrum metadata
275    pub fn spectrum(&self) -> &Spectrum {
276        &self.spectrum
277    }
278
279    /// Returns the spectral representation (centroid, profile) of this spectrum
280    pub fn representation(&self) -> Option<Representation> {
281        self.spectrum.representation()
282    }
283
284    /// Returns the polarity of this spectrum
285    pub fn polarity(&self) -> Option<Polarity> {
286        self.spectrum.polarity()
287    }
288
289    /// Returns the pixel coordinate of the spectrum
290    pub fn coordinate(&self) -> Option<Coordinate> {
291        let x = self.spectrum.x_position()?;
292        let y = self.spectrum.y_position()?;
293
294        Some(Coordinate { x, y, z: None })
295    }
296}
297
298/// ScanLocation describes how ImzMLWriter should deal with a particular scan.
299/// Either ignore it, or store it with the specified coordinate.
300pub enum ScanLocation {
301    /// Ignore the scan
302    Ignore,
303    /// Store the scan with the specified coordinate.
304    Location(Coordinate),
305}
306
307/// Coordinate describes the pixel location of a spectrum/scan
308#[derive(Debug, Hash, PartialEq, Eq)]
309pub struct Coordinate {
310    /// x-axis coordinate
311    pub x: u32,
312    /// y-axis coordinate
313    pub y: u32,
314    /// Optional z-axis coordinate
315    pub z: Option<u32>,
316}
317
318#[derive(Debug)]
319struct DataDescription {
320    is_base64_encoded: bool,
321
322    compression: Compression,
323    binary_type: BinaryDataType,
324}
325
326impl Default for DataDescription {
327    fn default() -> Self {
328        Self {
329            is_base64_encoded: false,
330            compression: Compression::None,
331            binary_type: BinaryDataType::Float64,
332        }
333    }
334}
335
336impl DataDescription {
337    pub fn binary_type(&self) -> BinaryDataType {
338        self.binary_type
339    }
340}
341
342// impl<'obo, 'iter, B: BufRead, D: BufRead + Seek> Iterator for &ImzMLWriter<'obo, 'iter, B, D>  {
343//     type Item = Coordinate;
344
345//     fn next(&mut self) -> Option<Self::Item> {
346//         self.coord_iterator.lock().unwrap().next()
347//     }
348// }
349
350// pub trait SpectrumAccessIterator<'obo> {
351//     type DataAccess: DataAccess;
352//     type SpectrumAccess: SpectrumAccess<'obo, DataAccess = Self::DataAccess>;
353//     type AccessIterator: Iterator<Item = Self::SpectrumAccess>;
354
355//     fn iter_access(self) -> Self::AccessIterator;
356// }
357
358/// SpectrumAccessIterator provides an iterator over a spectrum
359//#[derive(Clone, Copy)]
360pub struct SpectrumAccessIterator<D: Read + Seek> {
361    data_location: Arc<Mutex<D>>,
362    //mzml: MzML<'obo>,
363    spectra: Vec<Arc<Spectrum>>,
364
365    next_index: usize,
366}
367
368// impl<'obo, D: BufRead + Seek> SpectrumAccessIterator<'obo> for DiskSpectrumAccessIterator<'obo, D> {
369//     type DataAccess = DiskDataAccess<D>;
370//     type SpectrumAccess = DiskSpectrumAccess<'obo, D>;
371//     type AccessIterator = Self;
372
373//     fn iter_access(self) -> <Self as SpectrumAccessIterator<'obo>>::AccessIterator {
374//         self
375//     }
376// }
377
378impl<D: Read + Seek> SpectrumAccessIterator<D> {
379    fn new(data_location: Arc<Mutex<D>>, spectrum_list: &SpectrumList) -> Self {
380        Self {
381            data_location,
382            spectra: spectrum_list.spectra().to_vec(),
383            next_index: 0,
384        }
385    }
386}
387
388impl<D: Read + Seek> Iterator for SpectrumAccessIterator<D> {
389    type Item = SpectrumAccess<D>;
390
391    fn next(&mut self) -> Option<Self::Item> {
392        let cur_index = self.next_index;
393        self.next_index += 1;
394
395        self.spectra
396            .get(cur_index)
397            .map(|spectrum| SpectrumAccess::new(self.data_location.clone(), spectrum.clone()))
398    }
399}
400
401// impl<'a, B: BufRead, D: BufRead + Seek> AsRef<HeaderParser<'a, B>> for Parser<'a, B, D> {
402//     fn as_ref(&self) -> &HeaderParser<'a, B> {
403//         &self.header_parser
404//     }
405// }
406
407// impl<'a, B: BufRead, D: BufRead + Seek> std::ops::Deref for ImzMLParser<'a, B, D> {
408//     type Target = MzMLParser<'a, B>;
409
410//     fn deref(&self) -> &Self::Target {
411//         &self.header_parser
412//     }
413// }
414
415#[cfg(test)]
416mod tests {
417    use crate::imzml::ImzMLReader;
418    use crate::mzml::writer::Writer;
419    use crate::mzml::{DataReader, MzMLReader, MzMLTag};
420
421    use std::fs::File;
422    use std::io::{BufReader, BufWriter};
423    use std::time::Instant;
424
425    #[test]
426    fn test_all_in_test_folder() -> std::io::Result<()> {
427        let paths = std::fs::read_dir("../test/")?;
428
429        //let ontology = crate::obo::parser::parse("src/obo/imagingMS.obo").unwrap();
430
431        for path in paths {
432            let path = path?.path();
433            let extension = path.extension().unwrap();
434            let extension = extension.to_str().unwrap();
435
436            if !(extension == "imzML" || extension == "mzML") {
437                continue;
438            }
439
440            println!("Processing {:?}", path);
441
442            let start = Instant::now();
443
444            match extension {
445                "imzML" => {
446                    let parser = ImzMLReader::from_path(path).unwrap();
447
448                    let duration = start.elapsed();
449                    println!("Time elapsed when parsing imzML is: {:?}", duration);
450
451                    for error in parser.errors() {
452                        println!("{}", error);
453                    }
454                }
455                "mzML" => {
456                    let parser = MzMLReader::from_path(path).unwrap();
457
458                    let duration = start.elapsed();
459                    println!("Time elapsed when parsing imzML is: {:?}", duration);
460
461                    for error in parser.errors() {
462                        println!("{}", error);
463                    }
464                }
465                _ => {}
466            };
467
468            // let parser = MzMLReader::new(header_reader).unwrap();
469            //let mut parser = parser::ImzMLParser::from_file(path.path(), Some(&ontology)).unwrap();
470
471            // let start = Instant::now();
472            // let mut parser = parser::ImzMLParser::from_file(path, Some(&ontology)).unwrap();
473
474            // let duration = start.elapsed();
475            // println!("Time elapsed when parsing (i)mzML is: {:?}", duration);
476
477            // let mut i = 0;
478            // while parser.has_errors() {
479            //     println!("{:?}", parser.pop_error_front().unwrap());
480
481            //     if i > 10 {
482            //         break;
483            //     }
484
485            //     i += 1;
486            // }
487        }
488
489        Ok(())
490    }
491
492    #[test]
493    fn read_mzml_in_test_folder() -> std::io::Result<()> {
494        let paths = std::fs::read_dir("../test/")?;
495
496        //let ontology = crate::obo::parser::parse("src/obo/imagingMS.obo").unwrap();
497
498        for path in paths {
499            let path = path?;
500
501            let path = path.path();
502            let extension = path.extension().unwrap();
503            let extension = extension.to_str().unwrap();
504
505            if extension != "mzML" {
506                continue;
507            }
508
509            println!("Processing [data] {:?}", path);
510
511            let start = Instant::now();
512
513            let parser = DataReader::from_path(path).unwrap();
514            //let mut parser = parser::ImzMLParser::from_file(path.path(), Some(&ontology)).unwrap();
515
516            let duration = start.elapsed();
517            println!("Time elapsed when parsing mzML is: {:?}", duration);
518
519            let spectrum = parser.spectrum(0).unwrap();
520
521            let mzs = spectrum.mz_array().unwrap().as_f64().unwrap();
522
523            println!("{:?}", &mzs[..10]);
524
525            // let start = Instant::now();
526            // let mut parser = parser::ImzMLParser::from_file(path, Some(&ontology)).unwrap();
527
528            // let duration = start.elapsed();
529            // println!("Time elapsed when parsing (i)mzML is: {:?}", duration);
530
531            // let mut i = 0;
532            // while parser.has_errors() {
533            //     println!("{:?}", parser.pop_error_front().unwrap());
534
535            //     if i > 10 {
536            //         break;
537            //     }
538
539            //     i += 1;
540            // }
541        }
542
543        Ok(())
544    }
545
546    #[ignore]
547    #[test]
548    fn test_all_in_folder() -> std::io::Result<()> {
549        let paths = std::fs::read_dir(
550            "/home/alan/Documents/GitProjects/gomsi/metaspace/cmd/metaspacedownloader/output/",
551        )
552        .unwrap();
553
554        //let ontology = crate::obo::parser::parse("imagingMS.obo").unwrap();
555
556        for path in paths {
557            let path = path?;
558
559            if path.path().extension().unwrap() != "imzML" {
560                println!("Skipping {:?}.", path.path());
561                continue;
562            }
563
564            println!("Processing {:?}", path.path());
565
566            let start = Instant::now();
567
568            let file = File::open(path.path()).unwrap();
569            let header_reader = BufReader::new(file);
570
571            let parser = MzMLReader::new(header_reader).unwrap();
572            //let mut parser = parser::ImzMLParser::from_file(path.path(), Some(&ontology)).unwrap();
573
574            let duration = start.elapsed();
575            println!("Time elapsed when parsing imzML is: {:?}", duration);
576
577            for error in parser.errors() {
578                println!("{}", error);
579            }
580        }
581
582        Ok(())
583    }
584
585    #[ignore]
586    #[test]
587    fn test_obo() {
588        let start = Instant::now();
589        let ontology = crate::obo::parser::parse("imagingMS.obo").unwrap();
590        let duration = start.elapsed();
591        println!("Time elapsed when parsing ontology is: {:?}", duration);
592
593        let filename = "/home/alan/Documents/GitProjects/gomsi/metaspace/cmd/metaspacedownloader/output/2017-06-30_07h26m26s_Mousebrain_MG08_2017_GruppeF.imzML";
594        println!("[Parser] Reading with ontology");
595        let parser = ImzMLReader::from_path_with_ontology(filename, ontology.clone()).unwrap();
596
597        let duration = start.elapsed();
598        println!("Time elapsed when parsing imzML is: {:?}", duration);
599
600        for error in parser.errors() {
601            println!("{}", error);
602        }
603
604        let imzml = parser.mzml().unwrap();
605
606        // crate::validation::schema::validate(&imzml);
607
608        let mut errors = Vec::new();
609
610        let mapping = crate::validation::parse("ms-mapping.xml").unwrap();
611        mapping.validate(&ontology, imzml, &mut errors);
612
613        let mapping = crate::validation::parse("Ims1.1-mapping.xml").unwrap();
614        mapping.validate(&ontology, imzml, &mut errors);
615
616        let file = File::create("tmp.xml").unwrap();
617
618        let mut writer = Writer::new(BufWriter::new(file)).unwrap(); //new(Cursor::new(Vec::new()));
619
620        parser.mzml().unwrap().write_xml(&mut writer).unwrap();
621    }
622
623    #[test]
624    #[ignore]
625    fn test_parser() {
626        //let filename = "/home/alan/Documents/GitProjects/gomsi/metaspace/cmd/metaspacedownloader/output/2016-09-22_11h16m17s_Brain01_Bregma-1-46_centroid.imzML";
627        let filename = "/home/alan/Documents/GitProjects/gomsi/metaspace/cmd/metaspacedownloader/output/2017-06-30_07h26m26s_Mousebrain_MG08_2017_GruppeF.imzML";
628        //let filename = "/home/alan/Documents/GitProjects/gomsi/metaspace/cmd/metaspacedownloader/output/2019-03-28_09h18m57s_brain 25µm raw - root mean square - metaspace.imzML";
629        //let filename = "/home/alan/Documents/GitProjects/gomsi/metaspace/cmd/metaspacedownloader/output/2021-01-16_08h47m30s_peptide.imzML";
630        //let filename = "/home/alan/Documents/GitProjects/gomsi/metaspace/cmd/metaspacedownloader/output/2016-09-21_16h06m53s_Image5.imzML";
631
632        let start = Instant::now();
633        let parser = ImzMLReader::from_path(filename).unwrap();
634        let duration = start.elapsed();
635
636        for error in parser.errors() {
637            println!("{}", error);
638        }
639
640        //println!("width: {:?}", parser.current_mzml.unwrap().get_width());
641
642        let file = File::create("tmp.xml").unwrap();
643
644        let mut writer = Writer::new(BufWriter::new(file)).unwrap(); //new(Cursor::new(Vec::new()));
645
646        parser.mzml().unwrap().write_xml(&mut writer).unwrap();
647
648        println!("Time elapsed when parsing Mousebrain is: {:?}", duration);
649
650        println!("{:?}", parser.mzml().unwrap().spectrum(0));
651        println!(
652            "{:?}",
653            parser.mzml().unwrap().spectrum(0).unwrap().x_position()
654        );
655        println!(
656            "{:?}",
657            parser
658                .mzml()
659                .unwrap()
660                .referenceable_param_group_ref("mzArray")
661        );
662    }
663}