use std::collections::HashMap;
use std::io::{self, prelude::*, BufWriter};
use std::marker::PhantomData;
use std::str;
use mzpeaks::{
peak::KnownCharge, CentroidPeak, DeconvolutedPeak, IntensityMeasurement, MZLocated,
PeakCollection,
};
use crate::prelude::*;
use crate::meta::{
DataProcessing, FileDescription, InstrumentConfiguration, MSDataFileMetadata,
MassSpectrometryRun, Sample, Software,
};
use crate::params::{
ControlledVocabulary, ParamDescribed, ParamLike, ParamValue as _, CURIE,
};
use crate::spectrum::{
bindata::BinaryArrayMap,
IonProperties, Precursor, PrecursorSelection, RefPeakDataLevel, SignalContinuity,
SpectrumDescription, SpectrumLike,
};
const TITLE_CV: CURIE = ControlledVocabulary::MS.curie(1000796);
const MS_LEVEL_CV: CURIE = ControlledVocabulary::MS.curie(1000511);
const MSN_SPECTRUM_CV: CURIE = ControlledVocabulary::MS.curie(1000580);
pub trait MGFHeaderStyle: Sized {
fn write_header<
W: io::Write,
C: CentroidLike,
D: DeconvolutedCentroidLike,
S: SpectrumLike<C, D>,
>(
writer: &mut MGFWriterType<W, C, D, Self>,
spectrum: &S,
) -> io::Result<()> {
let desc = spectrum.description();
writer.write_kv("SCANS", &desc.index.to_string())?;
Ok(())
}
fn write_precursor<W: io::Write, C: CentroidLike, D: DeconvolutedCentroidLike>(
writer: &mut MGFWriterType<W, C, D, Self>,
precursor: &Precursor,
) -> io::Result<()> {
if let Some(ion) = precursor.ion() {
writer.handle.write_all(b"PEPMASS=")?;
writer.handle.write_all(ion.mz.to_string().as_bytes())?;
writer.handle.write_all(b" ")?;
writer
.handle
.write_all(ion.intensity.to_string().as_bytes())?;
if let Some(charge) = ion.charge {
writer.handle.write_all(b" ")?;
writer.handle.write_all(charge.to_string().as_bytes())?;
}
writer.handle.write_all(b"\n")?;
for param in ion
.params()
.iter()
.chain(precursor.activation.params())
{
writer.write_param(param)?;
}
}
if let Some(pid) = precursor.precursor_id() {
writer.handle.write_all(b"PRECURSORSCAN=")?;
writer.handle.write_all(pid.as_bytes())?;
writer.handle.write_all(b"\n")?;
}
Ok(())
}
}
#[derive(Debug, Clone, Copy)]
pub struct SimpleMGFStyle();
impl MGFHeaderStyle for SimpleMGFStyle {}
#[derive(Debug, Clone, Copy)]
pub struct MZDataMGFStyle();
impl MGFHeaderStyle for MZDataMGFStyle {
fn write_header<
W: io::Write,
C: CentroidLike,
D: DeconvolutedCentroidLike,
S: SpectrumLike<C, D>,
>(
writer: &mut MGFWriterType<W, C, D, Self>,
spectrum: &S,
) -> io::Result<()> {
let desc = spectrum.description();
writer.write_kv("NATIVEID", spectrum.id())?;
writer.write_kv("SCANS", &desc.index.to_string())?;
for param in desc
.params()
.iter()
.filter(|p| TITLE_CV != **p && MSN_SPECTRUM_CV != **p && MS_LEVEL_CV != **p)
{
writer.write_param(param)?;
}
Ok(())
}
}
pub struct MGFWriterType<
W: io::Write,
C: CentroidLike = CentroidPeak,
D: DeconvolutedCentroidLike = DeconvolutedPeak,
Y: MGFHeaderStyle = MZDataMGFStyle,
> {
pub handle: io::BufWriter<W>,
pub offset: usize,
centroid_type: PhantomData<C>,
deconvoluted_type: PhantomData<D>,
file_description: FileDescription,
instrument_configurations: HashMap<u32, InstrumentConfiguration>,
softwares: Vec<Software>,
samples: Vec<Sample>,
data_processings: Vec<DataProcessing>,
style_type: PhantomData<Y>,
run: MassSpectrometryRun,
}
impl<W: io::Write, C: CentroidLike, D: DeconvolutedCentroidLike, Y: MGFHeaderStyle>
MGFWriterType<W, C, D, Y>
{
pub fn new(file: W) -> MGFWriterType<W, C, D, Y> {
let handle = io::BufWriter::with_capacity(500, file);
MGFWriterType {
handle,
offset: 0,
centroid_type: PhantomData,
deconvoluted_type: PhantomData,
file_description: Default::default(),
instrument_configurations: Default::default(),
softwares: Default::default(),
samples: Default::default(),
data_processings: Default::default(),
run: Default::default(),
style_type: PhantomData,
}
}
pub fn make_title<S: SpectrumLike<C, D>>(&self, spectrum: &S) -> String {
let idx = spectrum.index();
let charge = spectrum
.precursor()
.and_then(|prec| prec.ion().and_then(|i| i.charge()))
.unwrap_or_default();
let id = spectrum.id();
let run_id = self.run_description().and_then(|d| d.id.as_ref());
let source_file = self.source_file_name();
match (run_id, source_file) {
(None, None) => format!("run.{idx}.{idx}.{charge} NativeID:\"{id}\""),
(None, Some(source_name)) => {
format!("run.{idx}.{idx}.{charge} SourceFile:\"{source_name}\"")
}
(Some(run_id), None) => format!("{run_id}.{idx}.{idx}.{charge} NativeID:\"{id}\""),
(Some(run_id), Some(source_name)) => format!(
"{run_id}.{idx}.{idx}.{charge} SourceFile:\"{source_name}\", NativeID:\"{id}\""
),
}
}
pub fn into_inner(self) -> BufWriter<W> {
self.handle
}
pub fn write_param<P: ParamLike>(&mut self, param: &P) -> io::Result<()> {
self.handle
.write_all(param.name().to_uppercase().replace(' ', "_").as_bytes())?;
self.handle.write_all(b"=")?;
self.handle.write_all(¶m.value().as_bytes())?;
self.handle.write_all(b"\n")?;
Ok(())
}
pub fn write_kv(&mut self, key: &str, value: &str) -> io::Result<()> {
self.handle.write_all(key.as_bytes())?;
self.handle.write_all(b"=")?;
self.handle.write_all(value.as_bytes())?;
self.handle.write_all(b"\n")?;
Ok(())
}
fn write_precursor(&mut self, precursor: &Precursor) -> io::Result<()> {
Y::write_precursor(self, precursor)?;
Ok(())
}
pub fn write_header<T: SpectrumLike<C, D>>(&mut self, spectrum: &T) -> io::Result<()> {
let desc = spectrum.description();
let (title, _had_title) = desc
.get_param_by_curie(&TITLE_CV)
.map(|p| (p.value.clone(), true))
.unwrap_or_else(|| (self.make_title(spectrum).into(), false));
self.handle.write_all(&title.as_bytes())?;
self.handle.write_all(b"\nRTINSECONDS=")?;
self.handle
.write_all((spectrum.start_time() * 60.0).to_string().as_bytes())?;
self.handle.write_all(b"\n")?;
if let Some(precursor) = &desc.precursor.first() {
self.write_precursor(precursor)?;
}
Y::write_header(self, spectrum)?;
Ok(())
}
fn write_deconvoluted_centroids(&mut self, centroids: &[D]) -> io::Result<()> {
let mut centroids: Vec<DeconvolutedPeak> =
centroids.iter().map(|p| p.as_centroid()).collect();
centroids.sort_by(|a, b| a.mz().total_cmp(&b.mz()));
for peak in centroids.into_iter() {
self.handle.write_all(peak.mz().to_string().as_bytes())?;
self.handle.write_all(b" ")?;
self.handle
.write_all(peak.intensity().to_string().as_bytes())?;
self.handle.write_all(b" ")?;
self.handle
.write_all(peak.charge().to_string().as_bytes())?;
self.handle.write_all(b"\n")?;
}
Ok(())
}
fn write_centroids(&mut self, centroids: &[C]) -> io::Result<()> {
for peak in centroids {
self.handle.write_all(peak.mz().to_string().as_bytes())?;
self.handle.write_all(b" ")?;
self.handle
.write_all(peak.intensity().to_string().as_bytes())?;
self.handle.write_all(b"\n")?;
}
Ok(())
}
fn write_arrays(
&mut self,
description: &SpectrumDescription,
arrays: &BinaryArrayMap,
) -> io::Result<()> {
match description.signal_continuity {
SignalContinuity::Centroid => {
for (mz, inten) in arrays.mzs()?.iter().zip(arrays.intensities()?.iter()) {
self.handle.write_all(mz.to_string().as_bytes())?;
self.handle.write_all(b" ")?;
self.handle.write_all(inten.to_string().as_bytes())?;
self.handle.write_all(b"\n")?;
}
}
SignalContinuity::Profile | SignalContinuity::Unknown => {
return Err(io::Error::new(
io::ErrorKind::InvalidData,
"MGF spectrum must be centroided",
))
}
}
Ok(())
}
pub fn write_peaks<S: SpectrumLike<C, D> + 'static>(&mut self, spectrum: &S) -> io::Result<()> {
let description = spectrum.description();
match spectrum.peaks() {
RefPeakDataLevel::Missing => {
log::warn!(
"Attempting to write a spectrum without any peak data, {}",
description.id
)
}
RefPeakDataLevel::RawData(arrays) => {
if description.signal_continuity == SignalContinuity::Profile {
return Err(io::Error::new(
io::ErrorKind::Unsupported,
"Cannot write profile spectrum to MGF",
));
}
self.write_arrays(description, arrays)?
}
RefPeakDataLevel::Centroid(centroids) => {
self.write_centroids(¢roids[0..centroids.len()])?
}
RefPeakDataLevel::Deconvoluted(deconvoluted) => {
self.write_deconvoluted_centroids(&deconvoluted[0..deconvoluted.len()])?
}
}
Ok(())
}
pub fn write<S: SpectrumLike<C, D> + 'static>(&mut self, spectrum: &S) -> io::Result<usize> {
let description = spectrum.description();
if description.ms_level == 1 {
log::warn!(
"Attempted to write an MS1 spectrum to MGF, {}, skipping.",
description.id
);
return Ok(0);
}
self.handle.write_all(
br#"BEGIN IONS
TITLE="#,
)?;
self.write_header(spectrum)?;
self.write_peaks(spectrum)?;
self.handle.write_all(b"END IONS\n")?;
Ok(0)
}
}
impl<W: io::Write, C: CentroidLike, D: DeconvolutedCentroidLike, Y: MGFHeaderStyle>
MSDataFileMetadata for MGFWriterType<W, C, D, Y>
{
crate::impl_metadata_trait!();
fn run_description(&self) -> Option<&MassSpectrometryRun> {
Some(&self.run)
}
fn run_description_mut(&mut self) -> Option<&mut MassSpectrometryRun> {
Some(&mut self.run)
}
}
impl<W: io::Write, C: CentroidLike + 'static, D: DeconvolutedCentroidLike + 'static>
SpectrumWriter<C, D> for MGFWriterType<W, C, D>
{
fn write<S: SpectrumLike<C, D> + 'static>(&mut self, spectrum: &S) -> io::Result<usize> {
if spectrum.ms_level() != 1 {
self.write(spectrum)
} else {
log::trace!("Skipping writing MS1 spectrum {} to MGF", spectrum.id());
Ok(0)
}
}
fn write_group<
S: SpectrumLike<C, D> + 'static,
G: super::super::SpectrumGrouping<C, D, S> + 'static,
>(
&mut self,
group: &G,
) -> io::Result<usize> {
let mut c = 0;
for s in group.products() {
c += self.write(s)?;
}
Ok(c)
}
fn flush(&mut self) -> io::Result<()> {
self.handle.flush()
}
fn close(&mut self) -> io::Result<()> {
self.handle.flush()
}
}
pub type MGFWriter<W> = MGFWriterType<W, CentroidPeak, DeconvolutedPeak, MZDataMGFStyle>;
pub type SimpleMGFWriter<W> = MGFWriterType<W, CentroidPeak, DeconvolutedPeak, SimpleMGFStyle>;
pub type SimpleMGFWriterType<W, C, D> = MGFWriterType<W, C, D, SimpleMGFStyle>;