use std::num::ParseIntError;
use omics::coordinate::Contig;
use omics::coordinate::Strand;
use omics::coordinate::interbase::Coordinate;
use omics::coordinate::interval;
use omics::coordinate::interval::interbase::Interval;
use omics::coordinate::position::Number;
use omics::coordinate::strand;
use thiserror::Error;
use crate::alignment::section::header::DELIMITER;
#[derive(Debug, Error)]
pub enum ParseError {
#[error("invalid chromosome size: {0}")]
InvalidChromosomeSize(ParseIntError),
#[error("invalid strand: {0}")]
InvalidStrand(strand::Error),
#[error("invalid alignment start: {0}")]
InvalidAlignmentStart(ParseIntError),
#[error("invalid alignment end: {0}")]
InvalidAlignmentEnd(ParseIntError),
}
#[derive(Debug, Error)]
pub enum Error {
#[error("interval error: {0}")]
Interval(interval::Error),
#[error("parse error: {0}")]
Parse(ParseError),
#[error("start position greater than end position")]
StartPositionGreaterThanEndPosition,
}
type Result<T> = std::result::Result<T, Error>;
#[derive(Clone, Debug, Eq, PartialEq)]
pub struct Sequence {
chromosome_name: Contig,
chromosome_size: Number,
strand: Strand,
alignment_start: Number,
alignment_end: Number,
}
impl Sequence {
pub fn try_from_str_parts(
chromosome_name: &str,
chromosome_size: &str,
strand: &str,
alignment_start: &str,
alignment_end: &str,
) -> Result<Self> {
let chromosome_name = Contig::new_unchecked(chromosome_name);
let chromosome_size = chromosome_size
.parse()
.map_err(|err| Error::Parse(ParseError::InvalidChromosomeSize(err)))?;
let strand = strand
.parse()
.map_err(|err| Error::Parse(ParseError::InvalidStrand(err)))?;
let alignment_start = alignment_start
.parse()
.map_err(|err| Error::Parse(ParseError::InvalidAlignmentStart(err)))?;
let alignment_end = alignment_end
.parse()
.map_err(|err| Error::Parse(ParseError::InvalidAlignmentEnd(err)))?;
if alignment_start > alignment_end {
return Err(Error::StartPositionGreaterThanEndPosition);
}
Ok(Self {
chromosome_name,
chromosome_size,
strand,
alignment_start,
alignment_end,
})
}
pub fn chromosome_name(&self) -> &str {
self.chromosome_name.as_ref()
}
pub fn chromosome_size(&self) -> Number {
self.chromosome_size
}
pub fn strand(&self) -> Strand {
self.strand
}
pub fn alignment_start(&self) -> Number {
self.alignment_start
}
pub fn alignment_end(&self) -> Number {
self.alignment_end
}
pub fn interval(&self) -> Result<Interval> {
let (start_pos, end_pos) = match self.strand {
Strand::Positive => (
self.alignment_start() as Number,
self.alignment_end() as Number,
),
Strand::Negative => (
self.chromosome_size - self.alignment_start() as Number,
self.chromosome_size - self.alignment_end() as Number,
),
};
let contig = Contig::new_unchecked(self.chromosome_name());
let start = Coordinate::new(contig.clone(), self.strand(), start_pos);
let end = Coordinate::new(contig, self.strand(), end_pos);
Interval::try_new(start, end).map_err(Error::Interval)
}
}
impl std::fmt::Display for Sequence {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(
f,
"{}{}{}{}{}{}{}{}{}",
self.chromosome_name.as_str(),
DELIMITER,
self.chromosome_size,
DELIMITER,
self.strand,
DELIMITER,
self.alignment_start,
DELIMITER,
self.alignment_end
)
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn try_new() {
let sequence = Sequence::try_from_str_parts("seq0", "2", "+", "0", "2").unwrap();
assert_eq!(sequence.chromosome_name(), "seq0");
assert_eq!(sequence.chromosome_size(), 2);
assert_eq!(sequence.strand(), Strand::Positive);
assert_eq!(sequence.alignment_start(), 0);
assert_eq!(sequence.alignment_end(), 2);
}
#[test]
fn invalid_chromosome_size() {
let err = Sequence::try_from_str_parts("seq0", "A", "+", "0", "2").unwrap_err();
assert!(matches!(
err,
Error::Parse(ParseError::InvalidChromosomeSize(_))
));
assert_eq!(
err.to_string(),
"parse error: invalid chromosome size: invalid digit found in string"
);
}
#[test]
fn invalid_strand() {
let err = Sequence::try_from_str_parts("seq0", "2", "?", "0", "2").unwrap_err();
assert!(matches!(err, Error::Parse(ParseError::InvalidStrand(_))));
assert_eq!(
err.to_string(),
"parse error: invalid strand: parse error: invalid strand: ?"
);
}
#[test]
fn invalid_alignment_start() {
let err = Sequence::try_from_str_parts("seq0", "2", "+", "?", "2").unwrap_err();
assert!(matches!(
err,
Error::Parse(ParseError::InvalidAlignmentStart(_))
));
assert_eq!(
err.to_string(),
"parse error: invalid alignment start: invalid digit found in string"
);
}
#[test]
fn invalid_alignment_end() {
let err = Sequence::try_from_str_parts("seq0", "2", "+", "0", "?").unwrap_err();
assert!(matches!(
err,
Error::Parse(ParseError::InvalidAlignmentEnd(_))
));
assert_eq!(
err.to_string(),
"parse error: invalid alignment end: invalid digit found in string"
);
}
#[test]
fn display() {
let sequence = Sequence::try_from_str_parts("seq0", "2", "+", "0", "1").unwrap();
assert_eq!(sequence.to_string(), "seq0 2 + 0 1");
}
}