1#![forbid(unsafe_code)]
15
16use std::path::{Path, PathBuf};
17
18mod error;
19pub use error::{Error, Result};
20
21pub use openproteo_core as core;
22
23#[cfg(feature = "arrow")]
24pub use openproteo_core::arrow;
25
26pub mod vendor {
28 #[cfg(feature = "thermo")]
29 pub use opentfraw;
30 #[cfg(feature = "bruker")]
31 pub use opentimstdf;
32 #[cfg(feature = "waters")]
33 pub use openwraw;
34}
35
36#[derive(Debug, Clone, Copy, PartialEq, Eq)]
38pub enum VendorFormat {
39 ThermoRaw,
41 BrukerTdf,
44 WatersRaw,
47}
48
49impl VendorFormat {
50 pub fn name(self) -> &'static str {
52 match self {
53 Self::ThermoRaw => "thermo",
54 Self::BrukerTdf => "bruker",
55 Self::WatersRaw => "waters",
56 }
57 }
58}
59
60#[derive(Debug, Clone)]
62pub struct Detected {
63 pub path: PathBuf,
67 pub format: VendorFormat,
69}
70
71pub fn detect_format(path: &Path) -> Option<Detected> {
78 if path.is_dir() {
79 if path.join("analysis.tdf").is_file() && path.join("analysis.tdf_bin").is_file() {
81 return Some(Detected {
82 path: path.to_path_buf(),
83 format: VendorFormat::BrukerTdf,
84 });
85 }
86 if path.join("_HEADER.TXT").is_file() {
87 return Some(Detected {
88 path: path.to_path_buf(),
89 format: VendorFormat::WatersRaw,
90 });
91 }
92 return None;
93 }
94 if path.is_file() {
95 if is_thermo_raw(path) {
96 return Some(Detected {
97 path: path.to_path_buf(),
98 format: VendorFormat::ThermoRaw,
99 });
100 }
101 return None;
102 }
103 None
104}
105
106fn is_thermo_raw(path: &Path) -> bool {
111 use std::fs::File;
112 use std::io::Read;
113 let Ok(mut f) = File::open(path) else {
114 return false;
115 };
116 let mut buf = [0u8; 18];
117 if f.read_exact(&mut buf).is_err() {
118 return false;
119 }
120 const FINNIGAN_UTF16LE: [u8; 16] = [
122 0x46, 0x00, 0x69, 0x00, 0x6e, 0x00, 0x6e, 0x00, 0x69, 0x00, 0x67, 0x00, 0x61, 0x00, 0x6e,
123 0x00,
124 ];
125 buf[2..18] == FINNIGAN_UTF16LE
126}
127
128#[allow(clippy::needless_pass_by_value)] pub fn convert_to_mzml(detected: Detected, output: &Path, indexed: bool) -> Result<()> {
133 use std::fs::File;
134 use std::io::BufWriter;
135 let f = File::create(output)?;
136 let mut w = BufWriter::new(f);
137 write_to(detected.format, &detected.path, &mut w, indexed)
138}
139
140#[allow(clippy::needless_pass_by_value)]
144pub fn convert_to_mzml_writer<W: std::io::Write>(
145 detected: Detected,
146 writer: &mut W,
147 indexed: bool,
148) -> Result<()> {
149 write_to(detected.format, &detected.path, writer, indexed)
150}
151
152fn write_to(
153 format: VendorFormat,
154 path: &Path,
155 w: &mut impl std::io::Write,
156 indexed: bool,
157) -> Result<()> {
158 match format {
159 VendorFormat::ThermoRaw => {
160 #[cfg(feature = "thermo")]
161 {
162 thermo_convert(path, w, indexed)
163 }
164 #[cfg(not(feature = "thermo"))]
165 {
166 let _ = (path, w, indexed);
167 Err(Error::FeatureDisabled { vendor: "thermo" })
168 }
169 }
170 VendorFormat::BrukerTdf => {
171 #[cfg(feature = "bruker")]
172 {
173 if indexed {
174 opentimstdf::mzml::write_indexed_mzml(path, w)?;
175 } else {
176 opentimstdf::mzml::write_mzml(path, w)?;
177 }
178 Ok(())
179 }
180 #[cfg(not(feature = "bruker"))]
181 {
182 let _ = (path, w, indexed);
183 Err(Error::FeatureDisabled { vendor: "bruker" })
184 }
185 }
186 VendorFormat::WatersRaw => {
187 #[cfg(feature = "waters")]
188 {
189 if indexed {
190 openwraw::mzml::write_indexed_mzml(path, w)?;
191 } else {
192 openwraw::mzml::write_mzml(path, w)?;
193 }
194 Ok(())
195 }
196 #[cfg(not(feature = "waters"))]
197 {
198 let _ = (path, w, indexed);
199 Err(Error::FeatureDisabled { vendor: "waters" })
200 }
201 }
202 }
203}
204
205#[cfg(feature = "thermo")]
206fn thermo_convert(path: &Path, out: &mut impl std::io::Write, indexed: bool) -> Result<()> {
207 use std::fs::File;
208 use std::io::BufReader;
209 let raw = opentfraw::RawFileReader::open_path(path)?;
210 let mut source = BufReader::with_capacity(2 << 20, File::open(path)?);
211 let filename = path
212 .file_name()
213 .and_then(|n| n.to_str())
214 .unwrap_or("unknown.raw");
215 if indexed {
216 opentfraw::mzml::write_indexed_mzml(&raw, &mut source, out, filename, false)?;
217 } else {
218 opentfraw::mzml::write_mzml(&raw, &mut source, out, filename, false)?;
219 }
220 Ok(())
221}
222
223#[allow(clippy::needless_pass_by_value)]
232pub fn collect(
233 detected: Detected,
234) -> Result<(
235 Vec<openproteo_core::SpectrumRecord>,
236 openproteo_core::RunMetadata,
237)> {
238 #[allow(unused_imports)]
239 use openproteo_core::SpectrumSource;
240 match detected.format {
241 VendorFormat::ThermoRaw => {
242 #[cfg(feature = "thermo")]
243 {
244 use std::fs::File;
245 use std::io::BufReader;
246 let raw = opentfraw::RawFileReader::open_path(&detected.path)?;
247 let mut source = BufReader::with_capacity(2 << 20, File::open(&detected.path)?);
248 let filename = detected
249 .path
250 .file_name()
251 .and_then(|n| n.to_str())
252 .unwrap_or("unknown.raw");
253 let mut src =
254 opentfraw::mzml::OpenTfRawSource::new(&raw, &mut source, filename, false);
255 let meta = src.run_metadata();
256 let recs: Vec<_> = src.iter_spectra().collect();
257 Ok((recs, meta))
258 }
259 #[cfg(not(feature = "thermo"))]
260 Err(Error::FeatureDisabled { vendor: "thermo" })
261 }
262 VendorFormat::BrukerTdf => {
263 #[cfg(feature = "bruker")]
264 {
265 let mut src = opentimstdf::mzml::TdfSource::open(&detected.path)?;
266 let meta = src.run_metadata();
267 let recs: Vec<_> = src.iter_spectra().collect();
268 Ok((recs, meta))
269 }
270 #[cfg(not(feature = "bruker"))]
271 Err(Error::FeatureDisabled { vendor: "bruker" })
272 }
273 VendorFormat::WatersRaw => {
274 #[cfg(feature = "waters")]
275 {
276 let mut src = openwraw::mzml::WatersSource::open(&detected.path)?;
277 let meta = src.run_metadata();
278 let recs: Vec<_> = src.iter_spectra().collect();
279 Ok((recs, meta))
280 }
281 #[cfg(not(feature = "waters"))]
282 Err(Error::FeatureDisabled { vendor: "waters" })
283 }
284 }
285}
286
287pub struct VecSource {
292 pub metadata: openproteo_core::RunMetadata,
293 pub records: Vec<openproteo_core::SpectrumRecord>,
294}
295
296impl VecSource {
297 pub fn new(
298 metadata: openproteo_core::RunMetadata,
299 records: Vec<openproteo_core::SpectrumRecord>,
300 ) -> Self {
301 Self { metadata, records }
302 }
303}
304
305impl openproteo_core::SpectrumSource for VecSource {
306 fn run_metadata(&self) -> openproteo_core::RunMetadata {
307 self.metadata.clone()
308 }
309 fn iter_spectra<'s>(
310 &'s mut self,
311 ) -> Box<dyn Iterator<Item = openproteo_core::SpectrumRecord> + 's> {
312 Box::new(self.records.drain(..))
313 }
314 fn spectrum_count_hint(&self) -> Option<usize> {
315 Some(self.records.len())
316 }
317}
318
319#[cfg(test)]
320mod tests {
321 use super::*;
322 use std::io::Write;
323
324 #[test]
325 fn detect_returns_none_for_garbage_file() {
326 let tmp = tempfile_path();
327 std::fs::write(&tmp, b"hello").unwrap();
328 assert!(detect_format(&tmp).is_none());
329 let _ = std::fs::remove_file(&tmp);
330 }
331
332 #[test]
333 fn detect_returns_thermo_for_finnigan_magic() {
334 let tmp = tempfile_path();
335 let mut f = std::fs::File::create(&tmp).unwrap();
336 f.write_all(&[
338 0x01, 0xa1, 0x46, 0x00, 0x69, 0x00, 0x6e, 0x00, 0x6e, 0x00, 0x69, 0x00, 0x67, 0x00,
339 0x61, 0x00, 0x6e, 0x00, 0xff, 0xff,
340 ])
341 .unwrap();
342 let det = detect_format(&tmp).expect("detect");
343 assert_eq!(det.format, VendorFormat::ThermoRaw);
344 let _ = std::fs::remove_file(&tmp);
345 }
346
347 #[test]
348 fn detect_returns_bruker_for_tdf_layout() {
349 let tmp = tempfile_dir();
350 std::fs::write(tmp.join("analysis.tdf"), b"").unwrap();
351 std::fs::write(tmp.join("analysis.tdf_bin"), b"").unwrap();
352 let det = detect_format(&tmp).expect("detect");
353 assert_eq!(det.format, VendorFormat::BrukerTdf);
354 let _ = std::fs::remove_dir_all(&tmp);
355 }
356
357 #[test]
358 fn detect_returns_waters_for_header_layout() {
359 let tmp = tempfile_dir();
360 std::fs::write(tmp.join("_HEADER.TXT"), b"$$ FAKE\n").unwrap();
361 let det = detect_format(&tmp).expect("detect");
362 assert_eq!(det.format, VendorFormat::WatersRaw);
363 let _ = std::fs::remove_dir_all(&tmp);
364 }
365
366 fn tempfile_path() -> PathBuf {
367 let pid = std::process::id();
368 let mut p = std::env::temp_dir();
369 p.push(format!("msio-test-{pid}-{:p}", &pid));
370 p
371 }
372
373 fn tempfile_dir() -> PathBuf {
374 let p = tempfile_path();
375 let _ = std::fs::create_dir_all(&p);
376 p
377 }
378
379 #[test]
380 fn convert_unsupported_format_returns_typed_error() {
381 let e: Error = std::io::Error::other("boom").into();
386 assert!(matches!(e, Error::Io(_)));
387 let e = Error::FeatureDisabled { vendor: "thermo" };
388 assert_eq!(
389 e.to_string(),
390 "openproteo-io was built without the 'thermo' feature"
391 );
392 let e = Error::UnsupportedFormat(PathBuf::from("/tmp/nope"));
393 assert!(matches!(e, Error::UnsupportedFormat(_)));
394 }
395}