imzml/mzml/
binarydataarray.rs

1use flate2::bufread::ZlibDecoder;
2use quick_xml::events::{BytesStart, Event};
3use std::collections::VecDeque;
4use std::io::{BufRead, Cursor, Read, Write};
5
6use super::cvparam::{search_all_params, CVParam, HasCVParams, HasParamGroupRefs};
7use super::writer::Writer;
8use super::{MzMLReader, MzMLTag, Tag};
9use crate::error::{FatalParseError, ParseError};
10use crate::mzml::attributes::{AttributeValue, LIST_ATTRIBUTES};
11use crate::mzml::cvparam::{CVParamValue, RawTerm, UserParam};
12
13use super::referenceableparamgroup::ReferenceableParamGroupRef;
14
15/// Accession in OBO for m/z array
16pub const ACCESSION_MZ_ARRAY: &str = "MS:1000514";
17/// Accession in OBO for intensity array
18pub const ACCESSION_INTENSITY_ARRAY: &str = "MS:1000515";
19
20/// Accession in OBO for binary data type
21pub const ACCESSION_BINARY_DATA_TYPE: &str = "MS:1000518";
22/// Accession in OBO for 64-bit float
23pub const ACCESSION_64_BIT_FLOAT: &str = "MS:1000523";
24/// Accession in OBO for 32-bit float
25pub const ACCESSION_32_BIT_FLOAT: &str = "MS:1000521";
26
27/// Accession in OBO for compression (parent term)
28pub const ACCESSION_COMPRESSION: &str = "MS:1000572";
29/// Accession in OBO for no compression
30pub const ACCESSION_NO_COMPRESSION: &str = "MS:1000576";
31/// Accession in OBO for Zlib compression
32pub const ACCESSION_ZLIB_COMPRESSION: &str = "MS:1000574";
33
34/// Accession in OBO for external array length
35pub const ACCESSION_EXTERNAL_ARRAY_LENGTH: &str = "IMS:1000103";
36const B_ACCESSION_EXTERNAL_ARRAY_LENGTH: &[u8] = b"IMS:1000103";
37
38/// Accession in OBO for external encoded length
39pub const ACCESSION_EXTERNAL_ENCODED_LENGTH: &str = "IMS:1000104";
40const B_ACCESSION_EXTERNAL_ENCODED_LENGTH: &[u8] = b"IMS:1000104";
41
42/// Accession in OBO for external offset
43pub const ACCESSION_EXTERNAL_OFFSET: &str = "IMS:1000102";
44const B_ACCESSION_EXTERNAL_OFFSET: &[u8] = b"IMS:1000102";
45
46/// Accession in OBO for external data
47pub const ACCESSION_EXTERNAL_DATA: &str = "IMS:1000101";
48const B_ACCESSION_EXTERNAL_DATA: &[u8] = b"IMS:1000101";
49
50/// The type of data being stored (e.g. m/z values, intensities)
51#[derive(Debug, PartialEq, Eq)]
52pub enum BinaryDataArrayType {
53    /// m/z array
54    MZArray,
55    /// Intensity array
56    IntenistyArray,
57}
58
59/// The type used to store binary data (e.g. m/zs or intensities)
60#[derive(Debug, Clone, Copy)]
61pub enum BinaryDataType {
62    /// Another format not specified here
63    Undefined,
64    /// f64
65    Float64,
66    /// f32
67    Float32,
68}
69
70/// <binaryDataArrayList> tag representation. Contains a list of BinaryDataArray
71pub struct BinaryDataArrayList {
72    // TODO: Currently need this to avoid breaking code with the old bda_list: Vec<BinaryDataArray> parameter on MzML struct
73    pub(crate) list: Vec<BinaryDataArray>,
74}
75
76impl BinaryDataArrayList {
77    fn new(count: usize) -> Self {
78        BinaryDataArrayList {
79            list: Vec::with_capacity(count),
80        }
81    }
82}
83
84impl MzMLTag for BinaryDataArrayList {
85    fn parse_start_tag<B: BufRead>(
86        parser: &mut MzMLReader<B>,
87        start_event: &BytesStart,
88    ) -> Result<Option<Self>, FatalParseError>
89    where
90        Self: std::marker::Sized,
91    {
92        if start_event.name().local_name().as_ref() != b"binaryDataArrayList" {
93            Err(FatalParseError::UnexpectedTag(format!(
94                "Unexpected event {:?} when processing BinaryDataArrayList",
95                start_event,
96            )))
97        } else {
98            let attributes = parser.process_attributes(
99                Tag::BinaryDataArrayList,
100                &LIST_ATTRIBUTES,
101                start_event,
102            )?;
103
104            let count = match attributes.get("count") {
105                Some(&AttributeValue::Integer(count)) => count as usize,
106                _ => 0,
107            };
108
109            parser
110                .breadcrumbs
111                .push_back((Tag::BinaryDataArrayList, None));
112
113            Ok(Some(BinaryDataArrayList::new(count)))
114        }
115    }
116
117    fn parse_xml<B: BufRead>(
118        &mut self,
119        parser: &mut MzMLReader<B>,
120        buffer: &mut Vec<u8>,
121    ) -> Result<(), FatalParseError> {
122        let mut last_num_params = 0;
123
124        // Check what comes next
125        loop {
126            // Clear the buffer ready for the next tag
127            buffer.clear();
128
129            let next_event = parser.next(buffer)?;
130
131            match next_event {
132                Event::Start(start_event) | Event::Empty(start_event) => {
133                    match start_event.name().as_ref() {
134                        b"binaryDataArray" => {
135                            if let Some(mut bda) =
136                                BinaryDataArray::parse_start_tag(parser, &start_event)?
137                            {
138                                bda.cv_params.reserve(last_num_params);
139                                bda.parse_xml(parser, buffer)?;
140
141                                last_num_params = bda.cv_params.len();
142
143                                self.list.push(bda);
144                            }
145                        }
146                        _ => parser.errors.push_back(ParseError::UnexpectedTag(format!(
147                            "{:?} unexpected when processing {:?}",
148                            std::str::from_utf8(start_event.name().as_ref()),
149                            Tag::BinaryDataArrayList
150                        ))),
151                    }
152                }
153                Event::End(end_event) => {
154                    if let b"binaryDataArrayList" = end_event.name().as_ref() {
155                        parser.breadcrumbs.pop_back();
156
157                        break;
158                    }
159                }
160                Event::Eof => {
161                    return Err(FatalParseError::MissingClosingTag(
162                        "binaryDataArrayList".to_string(),
163                    ));
164                }
165                _ => {}
166            }
167        }
168
169        Ok(())
170    }
171
172    fn tag() -> Tag {
173        Tag::BinaryDataArrayList
174    }
175
176    fn write_xml<W: Write>(&self, writer: &mut Writer<W>) -> Result<(), quick_xml::Error> {
177        writer.write_list("binaryDataArrayList", &self.list)
178    }
179}
180
181/// Possible means of compressing binary data stored in (i)mzML
182#[derive(Debug)]
183pub enum Compression {
184    /// No compression
185    None,
186    /// Compression has not been defined in the metadata
187    Undefined,
188    /// Z-lib compression
189    Zlib,
190}
191
192/// Provides a single interface to compressed data, decompressing the data on `read()` if necessary
193pub struct CompressionReader {
194    reader: Box<dyn Read>,
195}
196
197impl Read for CompressionReader {
198    #[inline]
199    fn read(&mut self, buf: &mut [u8]) -> std::io::Result<usize> {
200        self.reader.read(buf)
201    }
202}
203
204impl Compression {
205    /// Create a `CompressionReader` for the supplied data
206    pub fn to_reader(&self, data: Vec<u8>) -> CompressionReader {
207        match self {
208            Compression::None | Compression::Undefined => CompressionReader {
209                reader: Box::new(Cursor::new(data)),
210            },
211            Compression::Zlib => CompressionReader {
212                reader: Box::new(ZlibDecoder::new(Cursor::new(data))),
213            },
214        }
215    }
216}
217
218/// BinaryDataArray represents the <binaryDataArray> tag and captures information required to read and parse the stored binary data.
219#[derive(Debug)]
220pub struct BinaryDataArray {
221    param_group_refs: Vec<ReferenceableParamGroupRef>,
222    cv_params: Vec<CVParam>,
223    user_params: Vec<UserParam>,
224
225    is_external: bool,
226
227    array_length: Option<u64>,
228    encoded_length: Option<u64>,
229    offset: Option<u64>,
230}
231
232impl Clone for BinaryDataArray {
233    fn clone(&self) -> Self {
234        Self {
235            param_group_refs: self.param_group_refs.clone(),
236            cv_params: self.cv_params.clone(),
237            user_params: self.user_params.clone(),
238            is_external: self.is_external,
239            array_length: self.array_length,
240            encoded_length: self.encoded_length,
241            offset: self.offset,
242        }
243    }
244}
245
246impl Default for BinaryDataArray {
247    fn default() -> Self {
248        Self::new()
249    }
250}
251
252impl BinaryDataArray {
253    /// Create a new `BinaryDataArray` with no CV params
254    pub fn new() -> Self {
255        BinaryDataArray {
256            param_group_refs: Vec::new(),
257            cv_params: Vec::new(),
258            user_params: Vec::new(),
259
260            is_external: true,
261
262            array_length: None,
263            encoded_length: None,
264            offset: None,
265        }
266    }
267
268    pub(crate) fn set_external_params(
269        &mut self,
270        offset: u64,
271        array_length: u64,
272        encoded_length: u64,
273    ) {
274        self.is_external = true;
275        self.offset = Some(offset);
276        self.array_length = Some(array_length);
277        self.encoded_length = Some(encoded_length);
278    }
279
280    /// Return the `BinaryDataArrayType` (e.g. m/z array, intensity array) based on the CV params stored within.
281    pub fn array_type(&self) -> Option<BinaryDataArrayType> {
282        if let Some(_cv_param) = search_all_params(self, ACCESSION_MZ_ARRAY) {
283            return Some(BinaryDataArrayType::MZArray);
284        }
285        if let Some(_cv_param) = search_all_params(self, ACCESSION_INTENSITY_ARRAY) {
286            return Some(BinaryDataArrayType::IntenistyArray);
287        }
288        None
289    }
290
291    /// Return the `BinaryDataType` (e.g. f64, f32, ...) based on the CV params stored within.
292    pub fn binary_type(&self) -> BinaryDataType {
293        if let Some(_cv_param) = search_all_params(self, ACCESSION_64_BIT_FLOAT) {
294            return BinaryDataType::Float64;
295        }
296        if let Some(_cv_param) = search_all_params(self, ACCESSION_32_BIT_FLOAT) {
297            return BinaryDataType::Float32;
298        }
299
300        BinaryDataType::Undefined
301    }
302
303    /// Return the `Compression` (e.g. None, Z-lib, ...) used to compress the binary data, based on the CV params stored within.
304    pub fn compression(&self) -> Compression {
305        if let Some(_cv_param) = search_all_params(self, ACCESSION_NO_COMPRESSION) {
306            return Compression::None;
307        }
308        if let Some(_cv_param) = search_all_params(self, ACCESSION_ZLIB_COMPRESSION) {
309            return Compression::Zlib;
310        }
311
312        Compression::Undefined
313    }
314
315    /// Return the number of elements in the array.
316    pub fn array_length(&self) -> Option<u64> {
317        self.array_length
318    }
319
320    /// Return the number of bytes needed to store the array (in the specified data type, and after compression).
321    pub fn encoded_length(&self) -> Option<u64> {
322        self.encoded_length
323    }
324
325    /// Return the file offset depicting where the array is stored.
326    pub fn offset(&self) -> Option<u64> {
327        self.offset
328    }
329
330    /// Return true if the array is stored in an external file (e.g. ibd). Always true for imzML files.
331    pub fn is_data_external(&self) -> bool {
332        self.is_external
333    }
334
335    pub(crate) fn add_raw_term<'a>(
336        &mut self,
337        raw_term: &RawTerm<'a>,
338        breadcrumbs: &VecDeque<(Tag, Option<String>)>,
339        errors: &mut VecDeque<ParseError>,
340        ignore_uncommon_tags: bool,
341    ) {
342        match raw_term.raw_accession() {
343            B_ACCESSION_EXTERNAL_ARRAY_LENGTH => {
344                self.array_length = Some(raw_term.value_as_u64());
345            }
346            B_ACCESSION_EXTERNAL_ENCODED_LENGTH => {
347                self.encoded_length = Some(raw_term.value_as_u64());
348            }
349            B_ACCESSION_EXTERNAL_OFFSET => {
350                self.offset = Some(raw_term.value_as_u64());
351            }
352            B_ACCESSION_EXTERNAL_DATA => {}
353            _ => {
354                //let cv_param: Result<CVParam<'a>, crate::ParseError> = raw_term.into();
355
356                if !ignore_uncommon_tags {
357                    self.cv_params
358                        .push(raw_term.to_cv_param(breadcrumbs, errors))
359                }
360            }
361        }
362    }
363}
364
365impl MzMLTag for BinaryDataArray {
366    fn parse_start_tag<B: BufRead>(
367        parser: &mut MzMLReader<B>,
368        start_event: &BytesStart,
369    ) -> Result<Option<Self>, FatalParseError>
370    where
371        Self: std::marker::Sized,
372    {
373        if start_event.name().local_name().as_ref() != b"binaryDataArray" {
374            Err(FatalParseError::UnexpectedTag(format!(
375                "Unexpected event {:?} when processing BinaryDataArray",
376                start_event,
377            )))
378        } else {
379            // TODO: encodedLength
380
381            parser.breadcrumbs.push_back((Tag::BinaryDataArray, None));
382
383            Ok(Some(BinaryDataArray::new()))
384        }
385    }
386
387    fn parse_xml<B: BufRead>(
388        &mut self,
389        parser: &mut MzMLReader<B>,
390        buffer: &mut Vec<u8>,
391    ) -> Result<(), FatalParseError> {
392        // Check what comes next
393        loop {
394            // Clear the buffer ready for the next tag
395            buffer.clear();
396
397            let next_event = parser.next(buffer)?;
398            let is_empty = matches!(next_event, Event::Empty(_));
399
400            match next_event {
401                Event::Start(start_event) | Event::Empty(start_event) => {
402                    match start_event.name().as_ref() {
403                        b"cvParam" => {
404                            let raw_term = RawTerm::parse_start_tag(parser, &start_event)?;
405                            self.add_raw_term(
406                                &raw_term,
407                                &parser.breadcrumbs,
408                                &mut parser.errors,
409                                parser.ignore_uncommon_tags,
410                            );
411                        }
412                        b"referenceableParamGroupRef" => {
413                            let param_group_ref =
414                                ReferenceableParamGroupRef::parse_start_tag(parser, &start_event)?;
415                            self.param_group_refs.push(param_group_ref);
416                        }
417                        b"binary" => {
418                            if !is_empty && self.offset.is_none() {
419                                self.is_external = false;
420
421                                self.offset = Some(parser.reader.buffer_position() as u64);
422                            }
423                        }
424                        _ => parser.errors.push_back(ParseError::UnexpectedTag(format!(
425                            "{:?} unexpected when processing {:?}",
426                            std::str::from_utf8(start_event.name().as_ref()),
427                            Tag::BinaryDataArray
428                        ))),
429                    }
430                }
431                Event::Text(_) => {
432                    if !self.is_external {
433                        self.encoded_length =
434                            Some(parser.reader.buffer_position() as u64 - self.offset.unwrap());
435                    }
436                }
437                Event::End(end_event) => {
438                    if let b"binaryDataArray" = end_event.name().as_ref() {
439                        // Finished with gathering information, so now we can generate the DataAccess
440                        // self.data_access = Some(DataAccess::OnDisk(OnDiskAccess {
441                        //     file: parser.data_location.clone(),
442                        //     encoded_length: self.encoded_length.unwrap(),
443                        //     offset: self.offset.unwrap(),
444                        // }));
445
446                        parser.breadcrumbs.pop_back();
447
448                        break;
449                    }
450                }
451                Event::Eof => {
452                    return Err(FatalParseError::MissingClosingTag(
453                        "binaryDataArray".to_string(),
454                    ));
455                }
456                _ => {}
457            }
458        }
459
460        Ok(())
461    }
462
463    fn tag() -> Tag {
464        Tag::BinaryDataArray
465    }
466
467    fn write_xml<W: Write>(&self, writer: &mut Writer<W>) -> Result<(), quick_xml::Error> {
468        writer.start_tag_with_attr(
469            "binaryDataArray",
470            "encodedLength",
471            self.encoded_length.unwrap_or(0),
472        )?;
473
474        self.write_ref_param_groups_xml(writer)?;
475
476        if let Some(array_length) = self.array_length {
477            writer.write_param(
478                ACCESSION_EXTERNAL_ARRAY_LENGTH,
479                CVParamValue::NonNegativeInteger(array_length),
480            )?;
481        }
482
483        if let Some(encoded_length) = self.encoded_length {
484            writer.write_param(
485                ACCESSION_EXTERNAL_ENCODED_LENGTH,
486                CVParamValue::NonNegativeInteger(encoded_length),
487            )?;
488        }
489
490        if let Some(offset) = self.offset {
491            writer.write_param(
492                ACCESSION_EXTERNAL_OFFSET,
493                CVParamValue::NonNegativeInteger(offset),
494            )?;
495        }
496
497        // Include the parameter stating that the data is external
498        // TODO: Check it doesn't exist already?
499        writer.write_param(ACCESSION_EXTERNAL_DATA, CVParamValue::Empty)?;
500
501        self.write_params_xml(writer)?;
502
503        writer.empty_tag("binary")?;
504        writer.end_tag("binaryDataArray")
505    }
506}
507
508// struct BinaryDataArrayCVParamIterator<'obo, 'a> {
509//     bda: &'a BinaryDataArray<'obo>,
510//     extra: Vec<CVParam<'obo>>,
511// }
512
513// impl<'obo, 'a> BinaryDataArrayCVParamIterator<'obo, 'a> {
514//     fn new(bda: &'obo BinaryDataArray) -> Self {
515//         Self {
516//             bda,
517//             extra: Vec::new(),
518//         }
519//     }
520// }
521
522// impl<'obo, 'a> Iterator for BinaryDataArrayCVParamIterator<'obo, 'a> {
523//     type Item = CVParam<'a>;
524
525//     fn next(&mut self) -> Option<Self::Item> {
526//         Some(self.extra.get(0).unwrap().clone())
527//     }
528// }
529
530//impl<'a> WriteCVParams<'a> for BinaryDataArray<'a> {}
531impl HasCVParams for BinaryDataArray {
532    fn add_cv_param(&mut self, param: CVParam) {
533        match param.accession() {
534            ACCESSION_EXTERNAL_ARRAY_LENGTH => {
535                self.array_length = Some(param.value_as_u64().unwrap());
536            }
537            ACCESSION_EXTERNAL_ENCODED_LENGTH => {
538                self.encoded_length = Some(param.value_as_u64().unwrap());
539            }
540            ACCESSION_EXTERNAL_OFFSET => {
541                self.offset = Some(param.value_as_u64().unwrap());
542            }
543            ACCESSION_EXTERNAL_DATA => {}
544            /*ACCESSION_EXTERNAL_OFFSET => match param.get_value().as_ref().unwrap().parse() {
545                Ok(value) => self.offset = Some(value),
546                Err(_err) => self.cv_params.push(param),
547            },*/
548            _ => {
549                self.cv_params.push(param);
550            }
551        }
552    }
553
554    fn cv_params(&self) -> &Vec<CVParam> {
555        &self.cv_params
556    }
557
558    fn cv_params_mut(&mut self) -> &mut Vec<CVParam> {
559        self.cv_params.as_mut()
560    }
561
562    fn add_user_param(&mut self, param: UserParam) {
563        self.user_params.push(param);
564    }
565
566    fn user_params(&self) -> &Vec<UserParam> {
567        &self.user_params
568    }
569
570    // fn cv_param_iter(&self) -> CVParamIterator {
571    //     let params = self.clone_params_with_groups();
572
573    //     // Add in
574
575    //     CVParamIterator::from_cv_params(params)
576    // }
577}
578
579impl HasParamGroupRefs for BinaryDataArray {
580    fn add_param_group_ref(&mut self, param_group_ref: ReferenceableParamGroupRef) {
581        self.param_group_refs.push(param_group_ref);
582    }
583
584    fn param_group_refs(&self) -> &Vec<ReferenceableParamGroupRef> {
585        &self.param_group_refs
586    }
587}
588
589// impl<'a> ImzMLParser<'a> {
590//     pub(super) fn start_binary_data_array_list(
591//         &mut self,
592//         e: &BytesStart,
593//     ) -> Result<Tag, FatalParseError> {
594//         let attributes = self.process_attributes(Tag::BinaryDataArrayList, &LIST_ATTRIBUTES, e);
595
596//         if let Some(&AttributeValue::Integer(count)) = attributes.get("count") {
597//             if let Some(current_spectrum) = self.current_spectrum.as_mut() {
598//                 current_spectrum.binary_data_array_list = Vec::with_capacity(count as usize);
599//             } else if let Some(current_chromatogram) = self.current_chromatogram.as_mut() {
600//                 current_chromatogram.binary_data_array_list = Vec::with_capacity(count as usize);
601//             }
602//         }
603
604//         /*let mut has_error: bool = false;
605
606//         for att in e.attributes() {
607//             match att {
608//                 Ok(attribute) => match attribute.key {
609//                     b"count" => match parse_usize(attribute) {
610//                         Ok(value) => {
611//                             self.current_spectrum
612//                                 .as_mut()
613//                                 .unwrap()
614//                                 .binary_data_array_list = Vec::with_capacity(value);
615//                         }
616//                         Err(error) => {
617//                             self.errors
618//                                 .push_back(ParseError::IntError((Tag::BinaryDataArrayList, error)));
619//                             has_error = true;
620//                             break;
621//                         }
622//                     },
623//                     _ => {
624//                         self.errors.push_back(ParseError::UnexpectedAttribute((
625//                             Tag::BinaryDataArrayList,
626//                             std::str::from_utf8(attribute.key).unwrap().to_string(),
627//                         )));
628//                         has_error = true;
629//                         break;
630//                     }
631//                 },
632//                 Err(error) => {
633//                     self.errors
634//                         .push_back(ParseError::XMLError((Tag::BinaryDataArrayList, error)));
635//                     has_error = true;
636//                     break;
637//                 }
638//             };
639//         }*/
640//         /*if has_error {
641//             ParserState::Error
642//         } else {
643//             ParserState::Processing
644//         }*/
645//         Ok(Tag::BinaryDataArrayList)
646//     }
647
648//     pub(super) fn end_binary_data_array(&mut self) -> ParserState {
649//         match self.current_binary_data_array.take() {
650//             Some(binary_data_array) => {
651//                 if let Some(current_spectrum) = self.current_spectrum.as_mut() {
652//                     current_spectrum.add_binary_data_array(binary_data_array);
653//                 } else if let Some(current_chromatogram) = self.current_chromatogram.as_mut() {
654//                     current_chromatogram.add_binary_data_array(binary_data_array);
655//                 } else {
656//                     // TODO: ERROR neither current_spectrum nor current_chromatogram set
657//                 }
658//             }
659//             None => {
660//                 // TODO: Error here?
661//                 self.errors.push_back(ParseError::UnexpectedTag(
662//                     "Found </binaryDataArray> but no <binaryDataArray>".to_string(),
663//                 ))
664//             }
665//         };
666
667//         ParserState::Processing
668//     }
669// }