use std::{borrow::Cow, ops::Range, str::FromStr};
use context_error::*;
use mzcore::csv::CsvLine;
macro_rules! format_family {
($(#[doc = $ddoc:expr])*
$format:ident,
$complexity:ident, $peptidoform_availability:ident, $versions:expr, $separator:expr, $header:expr;
required { $($(#[doc = $rdoc:expr])? $rname:ident: $rtyp:ty, $rf:expr;)* }
optional { $($(#[doc = $odoc:expr])? $(#[cfg(feature = $ocfg:literal)])? $oname:ident: $otyp:ty, $of:expr;)*}
$($post_process:item)?) => {paste::paste!{
#[allow(unused_imports)] use context_error::*;
use super::super::common_parser::HasLocation;
#[doc = "The type to contain the format description for " $format " files."]
#[non_exhaustive]
#[derive(Clone, Debug, Default, Deserialize, Eq, Hash, Ord, PartialEq, PartialOrd, Serialize)]
pub struct [<$format Format>] {
$($rname: &'static str,)*
$($(#[cfg(feature = $ocfg)])? $oname: crate::common_parser::OptionalColumn,)*
version: [<$format Version>]
}
#[doc = "The data for individual entries in " $format " files."]
$(#[doc = $ddoc])*
#[non_exhaustive]
#[derive(Clone, Debug, Default, Deserialize, PartialEq, Serialize)]
#[allow(missing_docs)]
pub struct [<$format Data>] {
$($(#[doc = $rdoc])? pub $rname: $rtyp,)*
$($(#[doc = $odoc])? $(#[cfg(feature = $ocfg)])? pub $oname: Option<$otyp>,)*
pub version: [<$format Version>],
columns: Option<Vec<(std::sync::Arc<String>, String)>>,
}
impl IdentifiedPeptidoformSource for [<$format Data>] {
type Source = CsvLine;
type Format = [<$format Format>];
type Complexity = $complexity;
type PeptidoformAvailability = $peptidoform_availability;
type Version = [<$format Version>];
fn parse(source: &Self::Source, ontologies: &mzcore::ontology::Ontologies, keep_all_columns: bool) -> Result<(Self, &'static Self::Format), BoxedError<'static, BasicKind>> {
let mut errors = Vec::new();
for format in $versions {
match Self::parse_specific(source, format, ontologies, keep_all_columns) {
Ok(peptide) => return Ok((peptide, format)),
Err(err) => errors.push(err.version(format.version.to_string())),
}
}
Err(BoxedError::new(BasicKind::Error,
format!("Invalid {} line", stringify!($format)),
"The correct format could not be determined automatically",
source.full_context().to_owned(),
).add_underlying_errors(errors))
}
fn parse_file(
path: impl AsRef<std::path::Path>,
ontologies: &mzcore::ontology::Ontologies,
keep_all_columns: bool,
version: Option<Self::Version>,
) -> Result<BoxedIdentifiedPeptideIter<'_, Self>, BoxedError<'static, BasicKind>> {
let format = version.map(|v| v.format());
parse_csv(path, $separator, $header).and_then(|lines| {
let mut i = Self::parse_many::<Box<dyn Iterator<Item = Result<Self::Source, BoxedError<'_, BasicKind>>>>>(
Box::new(lines), ontologies, keep_all_columns, format);
if let Some(Err(e)) = i.peek() {
Err(e.clone())
} else {
Ok(i)
}
})
}
fn parse_reader<'a>(
reader: impl std::io::Read + 'a,
ontologies: &'a mzcore::ontology::Ontologies,
keep_all_columns: bool,
version: Option<Self::Version>,
) -> Result<BoxedIdentifiedPeptideIter<'a, Self>, BoxedError<'static, BasicKind>> {
let format = version.map(|v| v.format());
mzcore::csv::parse_csv_raw(reader, $separator, $header, None).and_then(move |lines| {
let mut i = Self::parse_many::<Box<dyn Iterator<Item = Result<Self::Source, BoxedError<'_, BasicKind>>>>>(
Box::new(lines), ontologies, keep_all_columns, format);
if let Some(Err(e)) = i.peek() {
Err(e.clone())
} else {
Ok(i)
}
})
}
#[allow(clippy::redundant_closure_call)] fn parse_specific(source: &Self::Source, format: &[<$format Format>], ontologies: &mzcore::ontology::Ontologies, keep_all_columns: bool) -> Result<Self, BoxedError<'static, BasicKind>> {
#[allow(unused_imports)]
use crate::helper_functions::InvertResult;
#[allow(non_snake_case, dead_code, clippy::missing_errors_doc)]
const fn Ok<T>(value: T) -> Result<T, BoxedError<'static, BasicKind>> {
Result::Ok(value)
}
let parsed = Self {
$($rname: $rf(source.column(format.$rname).map_err(BoxedError::to_owned)?, ontologies)?,)*
$($(#[cfg(feature = $ocfg)])? $oname: format.$oname.open_column(source).and_then(|l: Option<Location>| l.map(|value: Location| $of(value, ontologies)).invert()).map_err(BoxedError::to_owned)?,)*
version: format.version.clone(),
columns: keep_all_columns.then(|| source.values().map(|(h, v)| (h, v.to_string())).collect()),
};
Self::post_process(source, parsed, ontologies)
}
$($post_process)?
}
impl [<$format Data>] {
pub fn full_csv_line(&self) -> Option<&[(std::sync::Arc<String>, String)]> {
self.columns.as_deref()
}
}
impl From<[<$format Data>]> for IdentifiedPeptidoform<$complexity, $peptidoform_availability> {
fn from(value: [<$format Data>]) -> Self {
Self {
score: value.confidence(),
local_confidence: value.local_confidence().map(|v| v.to_vec()),
data: IdentifiedPeptidoformData::$format(value),
complexity_marker: PhantomData,
peptidoform_availability_marker: PhantomData,
}
}
}
}};
}
#[derive(
Copy,
Clone,
PartialEq,
Eq,
PartialOrd,
Ord,
Hash,
Debug,
Default,
serde::Serialize,
serde::Deserialize,
)]
pub(super) enum OptionalColumn {
#[default]
NotAvailable,
Optional(&'static str),
Required(&'static str),
}
impl OptionalColumn {
pub(super) fn open_column(
self,
source: &CsvLine,
) -> Result<Option<Location<'_>>, BoxedError<'static, BasicKind>> {
match self {
Self::NotAvailable => Ok(None),
Self::Optional(s) => Ok(source.column(s).ok()),
Self::Required(s) => source.column(s).map(Some),
}
}
}
pub(super) trait HasLocation {
fn column<'a>(&'a self, name: &'a str) -> Result<Location<'a>, BoxedError<'static, BasicKind>>;
}
impl HasLocation for CsvLine {
fn column<'a>(&'a self, name: &'a str) -> Result<Location<'a>, BoxedError<'static, BasicKind>> {
self.index_column(name)
.map(|(_v, c)| Location {
line: self,
location: c.clone(),
column: Some(name),
})
.map_err(BoxedError::to_owned)
}
}
#[derive(Clone)]
pub(super) struct Location<'a> {
pub(super) line: &'a CsvLine,
pub(super) location: Range<usize>,
pub(super) column: Option<&'a str>,
}
impl Location<'_> {
pub(super) fn len(&self) -> usize {
self.location.len()
}
pub(super) fn is_empty(&self) -> bool {
self.location.is_empty()
}
pub(super) fn array(self, sep: char) -> std::vec::IntoIter<Self> {
let mut offset = 0;
let mut output = Vec::new();
for part in self.as_str().split(sep) {
output.push(Location {
line: self.line,
location: self.location.start + offset..self.location.start + offset + part.len(),
column: self.column,
});
offset += part.len() + 1;
}
output.into_iter()
}
pub(super) fn or_empty(self) -> Option<Self> {
let text = self.as_str();
if text.is_empty() || text == "-" {
None
} else {
Some(self)
}
}
pub(super) fn ignore(self, pattern: &str) -> Option<Self> {
let text = self.as_str();
if text == pattern { None } else { Some(self) }
}
pub(super) fn skip(self, bytes: usize) -> Self {
Self {
line: self.line,
location: self
.location
.start
.saturating_add(bytes)
.min(self.location.end)..self.location.end,
column: self.column,
}
}
pub(super) fn trim_end_matches(mut self, pattern: &str) -> Self {
let trimmed = self.as_str().trim_end_matches(pattern);
let dif = self.location.len() - trimmed.len();
self.location = self.location.start..self.location.end - dif;
self
}
pub(super) fn trim_start_matches(mut self, pattern: &str) -> Self {
let trimmed = self.as_str().trim_start_matches(pattern);
let dif = self.location.len() - trimmed.len();
self.location = self.location.start + dif..self.location.end;
self
}
pub(super) fn get_string(self) -> String {
self.as_str().to_string()
}
pub(super) fn get_boxed_str(self) -> Box<str> {
self.as_str().into()
}
pub(super) fn as_str(&self) -> &str {
&self.line.line()[self.location.clone()]
}
pub(super) fn full_line(&self) -> &str {
self.line.line()
}
}
impl<'a> Location<'a> {
pub(super) fn parse<T: FromStr>(
self,
base_error: (&'static str, &'static str),
) -> Result<T, BoxedError<'static, BasicKind>> {
self.as_str().trim().parse().map_err(|_| {
BoxedError::new(
BasicKind::Error,
base_error.0,
base_error.1,
self.line
.range_context(self.location, self.column.map(Cow::Borrowed))
.to_owned(),
)
})
}
pub(super) fn parse_with<T>(
self,
f: impl Fn(Self) -> Result<T, BoxedError<'static, BasicKind>>,
) -> Result<T, BoxedError<'static, BasicKind>> {
f(self)
}
pub(super) fn context(&'a self) -> Context<'a> {
let base = Context::none()
.line_index(self.line.line_index as u32)
.lines(0, self.full_line());
let base = if let Some(comment) = self.column {
base.add_highlight((0, self.location.clone(), comment))
} else {
base.add_highlight((0, self.location.clone()))
};
if let Some(source) = &self.line.file {
base.source(source.as_ref().as_ref())
} else {
base
}
}
pub(super) fn trim(&self) -> Self {
let str = self.as_str();
let length = str.len();
let trimmed_start = length - str.trim_start().len();
let trimmed_end = length - str.trim_end().len();
Self {
line: self.line,
location: if self.location.start + trimmed_end > self.location.end - trimmed_end {
self.location.start..self.location.start
} else {
self.location.start + trimmed_start..self.location.end - trimmed_end
},
column: self.column,
}
}
pub(super) fn split_once(self, p: char) -> Option<(Self, Self)> {
self.as_str().split_once(p).map(|(start, end)| {
(
Self {
line: self.line,
location: self.location.start..self.location.start + start.len(),
column: self.column,
},
Self {
line: self.line,
location: self.location.end - end.len()..self.location.end,
column: self.column,
},
)
})
}
pub(super) fn split_twice(self, p: char) -> Option<(Self, Self, Self)> {
let (start, after) = self.as_str().split_once(p)?;
let (middle, end) = after.rsplit_once(p)?;
let start_middle = self.location.start + start.len() + p.len_utf8();
Some((
Self {
line: self.line,
location: self.location.start..self.location.start + start.len(),
column: self.column,
},
Self {
line: self.line,
location: start_middle..start_middle + middle.len(),
column: self.column,
},
Self {
line: self.line,
location: self.location.end - end.len()..self.location.end,
column: self.column,
},
))
}
}
pub(super) trait OptionalLocation<'a> {
fn parse<T: FromStr>(
self,
base_error: (&'static str, &'static str),
) -> Result<Option<T>, BoxedError<'static, BasicKind>>;
fn parse_with<T>(
self,
f: impl Fn(Location<'a>) -> Result<T, BoxedError<'static, BasicKind>>,
) -> Result<Option<T>, BoxedError<'static, BasicKind>>;
fn get_string(self) -> Option<String>;
type ArrayIter: Iterator<Item = Location<'a>>;
fn array(self, sep: char) -> Self::ArrayIter;
fn optional_array(self, sep: char) -> Option<Self::ArrayIter>;
fn ignore(self, pattern: &str) -> Option<Location<'a>>;
}
impl<'a> OptionalLocation<'a> for Option<Location<'a>> {
fn parse<T: FromStr>(
self,
base_error: (&'static str, &'static str),
) -> Result<Option<T>, BoxedError<'static, BasicKind>> {
self.map(|l| l.parse::<T>(base_error)).transpose()
}
fn parse_with<T>(
self,
f: impl Fn(Location<'a>) -> Result<T, BoxedError<'static, BasicKind>>,
) -> Result<Option<T>, BoxedError<'static, BasicKind>> {
self.map(f).transpose()
}
fn get_string(self) -> Option<String> {
self.map(Location::get_string)
}
type ArrayIter = std::vec::IntoIter<Location<'a>>;
fn array(self, sep: char) -> Self::ArrayIter {
self.map(|l| l.array(sep)).unwrap_or_default()
}
fn optional_array(self, sep: char) -> Option<Self::ArrayIter> {
self.map(|l| l.array(sep))
}
fn ignore(self, pattern: &str) -> Self {
self.and_then(|s| s.ignore(pattern))
}
}