use std::{
convert::TryFrom,
fmt::Display,
fs,
io::{self, prelude::*, BufReader},
path,
};
use flate2::bufread::GzDecoder;
use crate::{
io::compression::{is_gzipped, is_gzipped_extension},
meta::FormatConversion,
params::ControlledVocabulary,
Param,
};
#[cfg(feature = "mgf")]
use crate::io::mgf::is_mgf;
#[cfg(feature = "mzml")]
use crate::io::mzml::is_mzml;
#[cfg(feature = "thermo")]
use crate::io::thermo::is_thermo_raw_prefix;
#[cfg(feature = "imzml")]
use crate::io::imzml::is_imzml;
#[cfg(feature = "bruker_tdf")]
use crate::io::tdf::is_tdf;
#[non_exhaustive]
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
pub enum MassSpectrometryFormat {
MGF,
MzML,
MzMLb,
ThermoRaw,
BrukerTDF,
IMzML,
Unknown,
}
impl MassSpectrometryFormat {
pub fn as_conversion(&self) -> Option<FormatConversion> {
match self {
MassSpectrometryFormat::MzML => Some(FormatConversion::ConversionToMzML),
MassSpectrometryFormat::MzMLb => Some(FormatConversion::ConversionToMzMLb),
_ => None,
}
}
pub fn as_param(&self) -> Option<Param> {
let p = match self {
MassSpectrometryFormat::MGF => {
ControlledVocabulary::MS.const_param_ident("Mascot MGF format", 1001062)
}
MassSpectrometryFormat::MzML => {
ControlledVocabulary::MS.const_param_ident("mzML format", 1000584)
}
MassSpectrometryFormat::MzMLb => {
ControlledVocabulary::MS.const_param_ident("mzMLb format", 1002838)
}
MassSpectrometryFormat::ThermoRaw => {
ControlledVocabulary::MS.const_param_ident("Thermo RAW format", 1000563)
}
MassSpectrometryFormat::IMzML => {
ControlledVocabulary::MS.const_param_ident("imzML format", 1003577)
}
MassSpectrometryFormat::BrukerTDF => {
ControlledVocabulary::MS.const_param_ident("Bruker TDF format", 1002817)
}
MassSpectrometryFormat::Unknown => return None,
};
Some(p.into())
}
}
impl TryFrom<MassSpectrometryFormat> for Param {
type Error = &'static str;
fn try_from(value: MassSpectrometryFormat) -> Result<Self, Self::Error> {
if let Some(p) = value.as_param() {
Ok(p)
} else {
Err("No conversion")
}
}
}
impl TryFrom<MassSpectrometryFormat> for FormatConversion {
type Error = &'static str;
fn try_from(value: MassSpectrometryFormat) -> Result<Self, Self::Error> {
if let Some(p) = value.as_conversion() {
Ok(p)
} else {
Err("No conversion")
}
}
}
impl Display for MassSpectrometryFormat {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{:?}", self)
}
}
pub fn infer_from_path<P: Into<path::PathBuf>>(path: P) -> (MassSpectrometryFormat, bool) {
let path: path::PathBuf = path.into();
if path.is_dir() {
#[cfg(feature = "bruker_tdf")]
if is_tdf(path) {
return (MassSpectrometryFormat::BrukerTDF, false);
} else {
return (MassSpectrometryFormat::Unknown, false);
}
}
let (is_gzipped, path) = is_gzipped_extension(path);
if let Some(ext) = path.extension() {
if let Some(ext) = ext.to_ascii_lowercase().to_str() {
let form = match ext {
#[cfg(feature = "mzml")]
"mzml" => MassSpectrometryFormat::MzML,
#[cfg(feature = "mgf")]
"mgf" => MassSpectrometryFormat::MGF,
#[cfg(feature = "mzmlb")]
"mzmlb" => MassSpectrometryFormat::MzMLb,
#[cfg(feature = "thermo")]
"raw" => MassSpectrometryFormat::ThermoRaw,
#[cfg(feature = "imzml")]
"imzml" => MassSpectrometryFormat::IMzML,
_ => MassSpectrometryFormat::Unknown,
};
(form, is_gzipped)
} else {
(MassSpectrometryFormat::Unknown, is_gzipped)
}
} else {
(MassSpectrometryFormat::Unknown, is_gzipped)
}
}
pub fn infer_from_stream<R: Read + Seek>(
stream: &mut R,
) -> io::Result<(MassSpectrometryFormat, bool)> {
let mut buf = Vec::with_capacity(500);
buf.resize(500, b'\0');
let current_pos = stream.stream_position()?;
let bytes_read = stream.read(buf.as_mut_slice())?;
buf.shrink_to(bytes_read);
let is_stream_gzipped = is_gzipped(buf.as_slice());
if is_stream_gzipped {
let mut decompressed_buf = Vec::new();
decompressed_buf.resize(bytes_read, b'\0');
let mut decoder = GzDecoder::new(io::Cursor::new(buf));
decoder.read_exact(&mut decompressed_buf)?;
buf = decompressed_buf;
}
stream.seek(io::SeekFrom::Start(current_pos))?;
match &buf {
#[cfg(feature = "imzml")]
_ if is_imzml(&buf) => Ok((MassSpectrometryFormat::IMzML, is_stream_gzipped)),
#[cfg(feature = "mzml")]
_ if is_mzml(&buf) => Ok((MassSpectrometryFormat::MzML, is_stream_gzipped)),
#[cfg(feature = "mgf")]
_ if is_mgf(&buf) => Ok((MassSpectrometryFormat::MGF, is_stream_gzipped)),
#[cfg(feature = "thermo")]
_ if is_thermo_raw_prefix(&buf) => {
Ok((MassSpectrometryFormat::ThermoRaw, is_stream_gzipped))
}
_ => Ok((MassSpectrometryFormat::Unknown, is_stream_gzipped)),
}
}
pub fn infer_format<P: Into<path::PathBuf>>(path: P) -> io::Result<(MassSpectrometryFormat, bool)> {
let path: path::PathBuf = path.into();
let (format, is_gzipped) = infer_from_path(&path);
log::debug!("Inferred format from path: {:?} (gzip: {})", format, is_gzipped);
match format {
MassSpectrometryFormat::Unknown => {
if path.is_dir() {
Ok((MassSpectrometryFormat::Unknown, false))
} else {
let handle = fs::File::open(path.clone())?;
let mut stream = BufReader::new(handle);
let (format, is_gzipped) = infer_from_stream(&mut stream)?;
log::debug!(
"Inferred format from stream: {:?} (gzip: {})",
format,
is_gzipped
);
Ok((format, is_gzipped))
}
}
_ => Ok((format, is_gzipped)),
}
}