#![forbid(unsafe_code)]
use std::path::{Path, PathBuf};
mod error;
pub use error::{Error, Result};
pub use openproteo_core as core;
#[cfg(feature = "arrow")]
pub use openproteo_core::arrow as arrow;
pub mod vendor {
#[cfg(feature = "thermo")]
pub use opentfraw;
#[cfg(feature = "bruker")]
pub use opentimstdf;
#[cfg(feature = "waters")]
pub use openwraw;
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum VendorFormat {
ThermoRaw,
BrukerTdf,
WatersRaw,
}
impl VendorFormat {
pub fn name(self) -> &'static str {
match self {
Self::ThermoRaw => "thermo",
Self::BrukerTdf => "bruker",
Self::WatersRaw => "waters",
}
}
}
#[derive(Debug, Clone)]
pub struct Detected {
pub path: PathBuf,
pub format: VendorFormat,
}
pub fn detect_format(path: &Path) -> Option<Detected> {
if path.is_dir() {
if path.join("analysis.tdf").is_file() && path.join("analysis.tdf_bin").is_file() {
return Some(Detected {
path: path.to_path_buf(),
format: VendorFormat::BrukerTdf,
});
}
if path.join("_HEADER.TXT").is_file() {
return Some(Detected {
path: path.to_path_buf(),
format: VendorFormat::WatersRaw,
});
}
return None;
}
if path.is_file() {
if is_thermo_raw(path) {
return Some(Detected {
path: path.to_path_buf(),
format: VendorFormat::ThermoRaw,
});
}
return None;
}
None
}
fn is_thermo_raw(path: &Path) -> bool {
use std::fs::File;
use std::io::Read;
let Ok(mut f) = File::open(path) else {
return false;
};
let mut buf = [0u8; 18];
if f.read_exact(&mut buf).is_err() {
return false;
}
const FINNIGAN_UTF16LE: [u8; 16] = [
0x46, 0x00, 0x69, 0x00, 0x6e, 0x00, 0x6e, 0x00, 0x69, 0x00, 0x67, 0x00, 0x61, 0x00, 0x6e,
0x00,
];
buf[2..18] == FINNIGAN_UTF16LE
}
#[allow(clippy::needless_pass_by_value)] pub fn convert_to_mzml(
detected: Detected,
output: &Path,
indexed: bool,
) -> Result<()> {
use std::fs::File;
use std::io::BufWriter;
let f = File::create(output)?;
let mut w = BufWriter::new(f);
write_to(detected.format, &detected.path, &mut w, indexed)
}
#[allow(clippy::needless_pass_by_value)]
pub fn convert_to_mzml_writer<W: std::io::Write>(
detected: Detected,
writer: &mut W,
indexed: bool,
) -> Result<()> {
write_to(detected.format, &detected.path, writer, indexed)
}
fn write_to(
format: VendorFormat,
path: &Path,
w: &mut impl std::io::Write,
indexed: bool,
) -> Result<()> {
match format {
VendorFormat::ThermoRaw => {
#[cfg(feature = "thermo")]
{
thermo_convert(path, w, indexed)
}
#[cfg(not(feature = "thermo"))]
{
let _ = (path, w, indexed);
Err(Error::FeatureDisabled { vendor: "thermo" })
}
}
VendorFormat::BrukerTdf => {
#[cfg(feature = "bruker")]
{
if indexed {
opentimstdf::mzml::write_indexed_mzml(path, w)?;
} else {
opentimstdf::mzml::write_mzml(path, w)?;
}
Ok(())
}
#[cfg(not(feature = "bruker"))]
{
let _ = (path, w, indexed);
Err(Error::FeatureDisabled { vendor: "bruker" })
}
}
VendorFormat::WatersRaw => {
#[cfg(feature = "waters")]
{
if indexed {
openwraw::mzml::write_indexed_mzml(path, w)?;
} else {
openwraw::mzml::write_mzml(path, w)?;
}
Ok(())
}
#[cfg(not(feature = "waters"))]
{
let _ = (path, w, indexed);
Err(Error::FeatureDisabled { vendor: "waters" })
}
}
}
}
#[cfg(feature = "thermo")]
fn thermo_convert(
path: &Path,
out: &mut impl std::io::Write,
indexed: bool,
) -> Result<()> {
use std::fs::File;
use std::io::BufReader;
let raw = opentfraw::RawFileReader::open_path(path)?;
let mut source = BufReader::with_capacity(2 << 20, File::open(path)?);
let filename = path
.file_name()
.and_then(|n| n.to_str())
.unwrap_or("unknown.raw");
if indexed {
opentfraw::mzml::write_indexed_mzml(&raw, &mut source, out, filename, false)?;
} else {
opentfraw::mzml::write_mzml(&raw, &mut source, out, filename, false)?;
}
Ok(())
}
#[allow(clippy::needless_pass_by_value)]
pub fn collect(
detected: Detected,
) -> Result<(Vec<openproteo_core::SpectrumRecord>, openproteo_core::RunMetadata)>
{
#[allow(unused_imports)]
use openproteo_core::SpectrumSource;
match detected.format {
VendorFormat::ThermoRaw => {
#[cfg(feature = "thermo")]
{
use std::fs::File;
use std::io::BufReader;
let raw = opentfraw::RawFileReader::open_path(&detected.path)?;
let mut source =
BufReader::with_capacity(2 << 20, File::open(&detected.path)?);
let filename = detected
.path
.file_name()
.and_then(|n| n.to_str())
.unwrap_or("unknown.raw");
let mut src =
opentfraw::mzml::OpenTfRawSource::new(&raw, &mut source, filename, false);
let meta = src.run_metadata();
let recs: Vec<_> = src.iter_spectra().collect();
Ok((recs, meta))
}
#[cfg(not(feature = "thermo"))]
Err(Error::FeatureDisabled { vendor: "thermo" })
}
VendorFormat::BrukerTdf => {
#[cfg(feature = "bruker")]
{
let mut src = opentimstdf::mzml::TdfSource::open(&detected.path)?;
let meta = src.run_metadata();
let recs: Vec<_> = src.iter_spectra().collect();
Ok((recs, meta))
}
#[cfg(not(feature = "bruker"))]
Err(Error::FeatureDisabled { vendor: "bruker" })
}
VendorFormat::WatersRaw => {
#[cfg(feature = "waters")]
{
let mut src = openwraw::mzml::WatersSource::open(&detected.path)?;
let meta = src.run_metadata();
let recs: Vec<_> = src.iter_spectra().collect();
Ok((recs, meta))
}
#[cfg(not(feature = "waters"))]
Err(Error::FeatureDisabled { vendor: "waters" })
}
}
}
pub struct VecSource {
pub metadata: openproteo_core::RunMetadata,
pub records: Vec<openproteo_core::SpectrumRecord>,
}
impl VecSource {
pub fn new(
metadata: openproteo_core::RunMetadata,
records: Vec<openproteo_core::SpectrumRecord>,
) -> Self {
Self { metadata, records }
}
}
impl openproteo_core::SpectrumSource for VecSource {
fn run_metadata(&self) -> openproteo_core::RunMetadata {
self.metadata.clone()
}
fn iter_spectra<'s>(
&'s mut self,
) -> Box<dyn Iterator<Item = openproteo_core::SpectrumRecord> + 's> {
Box::new(self.records.drain(..))
}
fn spectrum_count_hint(&self) -> Option<usize> {
Some(self.records.len())
}
}
#[cfg(test)]
mod tests {
use super::*;
use std::io::Write;
#[test]
fn detect_returns_none_for_garbage_file() {
let tmp = tempfile_path();
std::fs::write(&tmp, b"hello").unwrap();
assert!(detect_format(&tmp).is_none());
let _ = std::fs::remove_file(&tmp);
}
#[test]
fn detect_returns_thermo_for_finnigan_magic() {
let tmp = tempfile_path();
let mut f = std::fs::File::create(&tmp).unwrap();
f.write_all(&[
0x01, 0xa1, 0x46, 0x00, 0x69, 0x00, 0x6e, 0x00, 0x6e, 0x00, 0x69, 0x00, 0x67, 0x00,
0x61, 0x00, 0x6e, 0x00, 0xff, 0xff,
])
.unwrap();
let det = detect_format(&tmp).expect("detect");
assert_eq!(det.format, VendorFormat::ThermoRaw);
let _ = std::fs::remove_file(&tmp);
}
#[test]
fn detect_returns_bruker_for_tdf_layout() {
let tmp = tempfile_dir();
std::fs::write(tmp.join("analysis.tdf"), b"").unwrap();
std::fs::write(tmp.join("analysis.tdf_bin"), b"").unwrap();
let det = detect_format(&tmp).expect("detect");
assert_eq!(det.format, VendorFormat::BrukerTdf);
let _ = std::fs::remove_dir_all(&tmp);
}
#[test]
fn detect_returns_waters_for_header_layout() {
let tmp = tempfile_dir();
std::fs::write(tmp.join("_HEADER.TXT"), b"$$ FAKE\n").unwrap();
let det = detect_format(&tmp).expect("detect");
assert_eq!(det.format, VendorFormat::WatersRaw);
let _ = std::fs::remove_dir_all(&tmp);
}
fn tempfile_path() -> PathBuf {
let pid = std::process::id();
let mut p = std::env::temp_dir();
p.push(format!("msio-test-{pid}-{:p}", &pid));
p
}
fn tempfile_dir() -> PathBuf {
let p = tempfile_path();
let _ = std::fs::create_dir_all(&p);
p
}
#[test]
fn convert_unsupported_format_returns_typed_error() {
let e: Error = std::io::Error::other("boom").into();
assert!(matches!(e, Error::Io(_)));
let e = Error::FeatureDisabled { vendor: "thermo" };
assert_eq!(e.to_string(), "openproteo-io was built without the 'thermo' feature");
let e = Error::UnsupportedFormat(PathBuf::from("/tmp/nope"));
assert!(matches!(e, Error::UnsupportedFormat(_)));
}
}