use std::{fmt::Display, path::Path};
use context_error::{BasicKind, BoxedError};
use mzcore::{
ontology::Ontologies,
sequence::{Linked, SemiAmbiguous, SimpleLinear},
};
use serde::{Deserialize, Serialize};
use crate::*;
#[derive(Clone, Copy, Debug, Deserialize, Eq, Hash, Ord, PartialEq, PartialOrd, Serialize)]
#[allow(clippy::upper_case_acronyms, missing_docs)]
pub enum KnownFileFormat {
#[cfg(feature = "mzannotate")]
AnnotatedSpectrum,
BasicCSV(BasicCSVVersion),
DeepNovoFamily(DeepNovoFamilyVersion),
Fasta,
InstaNovo(InstaNovoVersion),
MaxQuant(MaxQuantVersion),
MetaMorpheus(MetaMorpheusVersion),
MZTab,
NovoB(NovoBVersion),
Novor(NovorVersion),
Opair(OpairVersion),
Peaks(PeaksVersion),
PepNet(PepNetVersion),
PiHelixNovo(PiHelixNovoVersion),
PiPrimeNovo(PiPrimeNovoVersion),
PLGS(PLGSVersion),
PLink(PLinkVersion),
PowerNovo(PowerNovoVersion),
Proteoscape(ProteoscapeVersion),
PUniFind(PUniFindVersion),
Sage(SageVersion),
MSFragger(MSFraggerVersion),
SpectrumSequenceList(SpectrumSequenceListVersion),
}
impl KnownFileFormat {
pub const fn name(self) -> &'static str {
match self {
#[cfg(feature = "mzannotate")]
Self::AnnotatedSpectrum => "mzSpecLib",
Self::BasicCSV(_) => "CSV",
Self::DeepNovoFamily(_) => "DeepNovo Family",
Self::Fasta => "Fasta",
Self::InstaNovo(_) => "InstaNovo",
Self::MaxQuant(_) => "MaxQuant",
Self::MetaMorpheus(_) => "MetaMorpheus",
Self::NovoB(_) => "NovoB",
Self::Novor(_) => "Novor",
Self::Opair(_) => "OPair",
Self::Peaks(_) => "PEAKS",
Self::PepNet(_) => "PepNet",
Self::MZTab => "mzTab",
Self::PiHelixNovo(_) => "π-HelixNovo",
Self::PiPrimeNovo(_) => "π-PrimeNovo",
Self::PLGS(_) => "ProteinLynx Global Server",
Self::PLink(_) => "pLink",
Self::PowerNovo(_) => "PowerNovo",
Self::Proteoscape(_) => "Proteoscape",
Self::PUniFind(_) => "pUniFind",
Self::Sage(_) => "Sage",
Self::MSFragger(_) => "MSFragger",
Self::SpectrumSequenceList(_) => "SpectrumSequenceList",
}
}
pub fn version(self) -> Option<String> {
match self {
#[cfg(feature = "mzannotate")]
Self::AnnotatedSpectrum => None,
Self::Fasta => None,
Self::BasicCSV(version) => Some(version.to_string()),
Self::DeepNovoFamily(version) => Some(version.to_string()),
Self::InstaNovo(version) => Some(version.to_string()),
Self::MaxQuant(version) => Some(version.to_string()),
Self::MetaMorpheus(version) => Some(version.to_string()),
Self::MZTab => Some("1.0".to_string()),
Self::NovoB(version) => Some(version.to_string()),
Self::Novor(version) => Some(version.to_string()),
Self::Opair(version) => Some(version.to_string()),
Self::Peaks(version) => Some(version.to_string()),
Self::PepNet(version) => Some(version.to_string()),
Self::PiHelixNovo(version) => Some(version.to_string()),
Self::PiPrimeNovo(version) => Some(version.to_string()),
Self::PLGS(version) => Some(version.to_string()),
Self::PLink(version) => Some(version.to_string()),
Self::PowerNovo(version) => Some(version.to_string()),
Self::Proteoscape(version) => Some(version.to_string()),
Self::PUniFind(version) => Some(version.to_string()),
Self::Sage(version) => Some(version.to_string()),
Self::MSFragger(version) => Some(version.to_string()),
Self::SpectrumSequenceList(version) => Some(version.to_string()),
}
}
}
impl Display for KnownFileFormat {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(
f,
"{}{}",
self.name(),
self.version().map_or(String::new(), |v| format!(" - {v}"))
)
}
}
impl From<KnownFileFormat> for FileFormat {
fn from(value: KnownFileFormat) -> Self {
match value {
#[cfg(feature = "mzannotate")]
KnownFileFormat::AnnotatedSpectrum => Self::AnnotatedSpectrum,
KnownFileFormat::BasicCSV(version) => Self::BasicCSV(Some(version)),
KnownFileFormat::DeepNovoFamily(version) => Self::DeepNovoFamily(Some(version)),
KnownFileFormat::Fasta => Self::Fasta,
KnownFileFormat::InstaNovo(version) => Self::InstaNovo(Some(version)),
KnownFileFormat::MaxQuant(version) => Self::MaxQuant(Some(version)),
KnownFileFormat::MetaMorpheus(version) => Self::MetaMorpheus(Some(version)),
KnownFileFormat::MZTab => Self::MZTab,
KnownFileFormat::NovoB(version) => Self::NovoB(Some(version)),
KnownFileFormat::Novor(version) => Self::Novor(Some(version)),
KnownFileFormat::Opair(version) => Self::Opair(Some(version)),
KnownFileFormat::Peaks(version) => Self::Peaks(Some(version)),
KnownFileFormat::PepNet(version) => Self::PepNet(Some(version)),
KnownFileFormat::PiHelixNovo(version) => Self::PiHelixNovo(Some(version)),
KnownFileFormat::PiPrimeNovo(version) => Self::PiPrimeNovo(Some(version)),
KnownFileFormat::PLGS(version) => Self::PLGS(Some(version)),
KnownFileFormat::PLink(version) => Self::PLink(Some(version)),
KnownFileFormat::PowerNovo(version) => Self::PowerNovo(Some(version)),
KnownFileFormat::Proteoscape(version) => Self::Proteoscape(Some(version)),
KnownFileFormat::PUniFind(version) => Self::PUniFind(Some(version)),
KnownFileFormat::Sage(version) => Self::Sage(Some(version)),
KnownFileFormat::MSFragger(version) => Self::MSFragger(Some(version)),
KnownFileFormat::SpectrumSequenceList(version) => {
Self::SpectrumSequenceList(Some(version))
}
}
}
}
#[derive(Clone, Copy, Debug, Default, Deserialize, Eq, PartialEq, Serialize)]
#[allow(clippy::upper_case_acronyms, missing_docs)]
pub enum FileFormat {
#[cfg(feature = "mzannotate")]
AnnotatedSpectrum,
BasicCSV(Option<BasicCSVVersion>),
DeepNovoFamily(Option<DeepNovoFamilyVersion>),
Fasta,
InstaNovo(Option<InstaNovoVersion>),
MaxQuant(Option<MaxQuantVersion>),
MetaMorpheus(Option<MetaMorpheusVersion>),
MZTab,
NovoB(Option<NovoBVersion>),
Novor(Option<NovorVersion>),
Opair(Option<OpairVersion>),
Peaks(Option<PeaksVersion>),
PepNet(Option<PepNetVersion>),
PiHelixNovo(Option<PiHelixNovoVersion>),
PiPrimeNovo(Option<PiPrimeNovoVersion>),
PLGS(Option<PLGSVersion>),
PLink(Option<PLinkVersion>),
PowerNovo(Option<PowerNovoVersion>),
Proteoscape(Option<ProteoscapeVersion>),
PUniFind(Option<PUniFindVersion>),
Sage(Option<SageVersion>),
MSFragger(Option<MSFraggerVersion>),
SpectrumSequenceList(Option<SpectrumSequenceListVersion>),
#[default]
Undefined,
}
impl FileFormat {
pub fn open<'a>(
self,
path: &Path,
ontologies: &'a Ontologies,
) -> Result<GeneralIdentifiedPeptidoforms<'a>, BoxedError<'static, BasicKind>> {
match self {
#[cfg(feature = "mzannotate")]
Self::AnnotatedSpectrum => annotated_spectrum::parse_mzspeclib(path, ontologies),
Self::BasicCSV(version) => BasicCSVData::parse_file(path, ontologies, false, version)
.map(IdentifiedPeptidoformIter::into_box),
Self::DeepNovoFamily(version) => {
DeepNovoFamilyData::parse_file(path, ontologies, false, version)
.map(IdentifiedPeptidoformIter::into_box)
}
Self::Fasta => FastaData::parse_file(path).map(|sequences| {
let b: Box<
dyn Iterator<
Item = Result<
IdentifiedPeptidoform<Linked, MaybePeptidoform>,
BoxedError<'static, BasicKind>,
>,
>,
> = Box::new(sequences.into_iter().map(|p| {
Ok(IdentifiedPeptidoform::<SemiAmbiguous, PeptidoformPresent>::from(p).cast())
}));
b
}),
Self::InstaNovo(version) => InstaNovoData::parse_file(path, ontologies, false, version)
.map(IdentifiedPeptidoformIter::into_box),
Self::MaxQuant(version) => MaxQuantData::parse_file(path, ontologies, false, version)
.map(IdentifiedPeptidoformIter::into_box),
Self::MetaMorpheus(version) => {
MetaMorpheusData::parse_file(path, ontologies, false, version)
.map(IdentifiedPeptidoformIter::into_box)
}
Self::MZTab => MZTabData::parse_file(path, ontologies).map(|sequences| {
let b: Box<
dyn Iterator<
Item = Result<
IdentifiedPeptidoform<Linked, MaybePeptidoform>,
BoxedError<'static, BasicKind>,
>,
>,
> = Box::new(sequences.into_iter().map(|p| {
p.map(|p| {
IdentifiedPeptidoform::<SimpleLinear, MaybePeptidoform>::from(p).cast()
})
}));
b
}),
Self::PiHelixNovo(version) => {
PiHelixNovoData::parse_file(path, ontologies, false, version)
.map(IdentifiedPeptidoformIter::into_box)
}
Self::PiPrimeNovo(version) => {
PiPrimeNovoData::parse_file(path, ontologies, false, version)
.map(IdentifiedPeptidoformIter::into_box)
}
Self::NovoB(version) => NovoBData::parse_file(path, ontologies, false, version)
.map(IdentifiedPeptidoformIter::into_box),
Self::Novor(version) => NovorData::parse_file(path, ontologies, false, version)
.map(IdentifiedPeptidoformIter::into_box),
Self::Opair(version) => OpairData::parse_file(path, ontologies, false, version)
.map(IdentifiedPeptidoformIter::into_box),
Self::PLGS(version) => PLGSData::parse_file(path, ontologies, false, version)
.map(IdentifiedPeptidoformIter::into_box),
Self::PLink(version) => PLinkData::parse_file(path, ontologies, false, version)
.map(IdentifiedPeptidoformIter::into_box),
Self::Peaks(version) => PeaksData::parse_file(path, ontologies, false, version)
.map(IdentifiedPeptidoformIter::into_box),
Self::PepNet(version) => PepNetData::parse_file(path, ontologies, false, version)
.map(IdentifiedPeptidoformIter::into_box),
Self::MSFragger(version) => MSFraggerData::parse_file(path, ontologies, false, version)
.map(IdentifiedPeptidoformIter::into_box),
Self::PowerNovo(version) => PowerNovoData::parse_file(path, ontologies, false, version)
.map(IdentifiedPeptidoformIter::into_box),
Self::Proteoscape(version) => {
ProteoscapeData::parse_file(path, ontologies, false, version)
.map(IdentifiedPeptidoformIter::into_box)
}
Self::PUniFind(version) => PUniFindData::parse_file(path, ontologies, false, version)
.map(IdentifiedPeptidoformIter::into_box),
Self::Sage(version) => SageData::parse_file(path, ontologies, false, version)
.map(IdentifiedPeptidoformIter::into_box),
Self::SpectrumSequenceList(version) => {
SpectrumSequenceListData::parse_file(path, ontologies, false, version)
.map(IdentifiedPeptidoformIter::into_box)
}
Self::Undefined => open_identified_peptidoforms_file(path, ontologies, false),
}
}
}