use serde::Deserialize;
use std::path::PathBuf;
use super::{
bed::{valid_bedlike, Bed4Addition, Bed4Iterator},
tsv::build_tsv_reader,
utils::get_base_extension,
Bed3Iterator, Bed5Addition, Bed5Iterator, BedlikeIterator,
};
use crate::{
ranges::{GenomicRangeRecord, GenomicRangeRecordEmpty},
GRangesError,
};
#[derive(Debug)]
pub enum GenomicRangesParser {
Bed3(Bed3Iterator),
Bed4(Bed4Iterator),
Bed5(Bed5Iterator),
Bedlike(BedlikeIterator),
Unsupported,
}
#[derive(Debug, PartialEq)]
pub enum GenomicRangesFile {
Bed3(PathBuf),
Bed4(PathBuf),
Bed5(PathBuf),
Bedlike(PathBuf),
Unsupported,
}
pub fn detect_bed_variant(
filepath: impl Into<PathBuf>,
) -> Result<Option<GenomicRangesFile>, GRangesError> {
let filepath = filepath.into();
if try_deserialize::<GenomicRangeRecord<Bed5Addition>>(&filepath)? {
Ok(Some(GenomicRangesFile::Bed5(filepath)))
} else if try_deserialize::<GenomicRangeRecord<Bed4Addition>>(&filepath)? {
Ok(Some(GenomicRangesFile::Bed4(filepath)))
} else if try_deserialize::<GenomicRangeRecordEmpty>(&filepath)? {
Ok(Some(GenomicRangesFile::Bed3(filepath)))
} else {
Ok(None)
}
}
fn try_deserialize<T: for<'de> Deserialize<'de> + std::fmt::Debug>(
filepath: impl Into<PathBuf>,
) -> Result<bool, GRangesError> {
let filepath = filepath.into();
let reader = build_tsv_reader(&filepath)?;
let mut iter = reader.into_deserialize::<T>();
let next_item = iter.next();
if let Some(result) = next_item {
Ok(result.is_ok())
} else {
Err(GRangesError::EmptyFile(
filepath.to_string_lossy().to_string(),
))
}
}
impl GenomicRangesFile {
pub fn detect(filepath: impl Into<PathBuf>) -> Result<Self, GRangesError> {
let filepath: PathBuf = filepath.into();
let is_valid_bedlike = valid_bedlike(&filepath)?;
let extension =
get_base_extension(&filepath).ok_or(GRangesError::CouldNotDetectRangesFiletype)?;
if extension.ends_with("tsv") && is_valid_bedlike {
return Ok(GenomicRangesFile::Bedlike(filepath));
}
if let Some(bed_filetype) = detect_bed_variant(&filepath)? {
return Ok(bed_filetype);
}
if is_valid_bedlike {
return Ok(GenomicRangesFile::Bedlike(filepath));
}
Ok(GenomicRangesFile::Unsupported)
}
pub fn parsing_iterator(
filepath: impl Clone + Into<PathBuf>,
) -> Result<GenomicRangesParser, GRangesError> {
let path = filepath.into();
match Self::detect(path)? {
GenomicRangesFile::Bed3(path) => {
Ok(GenomicRangesParser::Bed3(Bed3Iterator::new(path)?))
}
GenomicRangesFile::Bed4(path) => {
Ok(GenomicRangesParser::Bed4(Bed4Iterator::new(path)?))
}
GenomicRangesFile::Bed5(path) => {
Ok(GenomicRangesParser::Bed5(Bed5Iterator::new(path)?))
}
GenomicRangesFile::Bedlike(path) => {
Ok(GenomicRangesParser::Bedlike(BedlikeIterator::new(path)?))
}
GenomicRangesFile::Unsupported => Err(GRangesError::UnsupportedGenomicRangesFileFormat),
}
}
}
#[cfg(test)]
mod tests {
use super::GenomicRangesFile;
#[test]
fn test_rangefiletype_detect() {
let range_filetype = GenomicRangesFile::detect("tests_data/example.bed");
assert!(matches!(
range_filetype.unwrap(),
GenomicRangesFile::Bed3(_)
));
let range_filetype = GenomicRangesFile::detect("tests_data/example_bedlike.tsv");
assert!(matches!(
range_filetype.unwrap(),
GenomicRangesFile::Bedlike(_)
));
let range_filetype = GenomicRangesFile::detect("tests_data/test_case_03.bed");
assert!(matches!(
range_filetype.unwrap(),
GenomicRangesFile::Bed5(_)
));
}
}