use std::{convert::TryFrom, io::Read};
use crate::{segment_type::SegmentType, ReadStructure, ReadStructureError};
pub const ANY_LENGTH_BYTE: u8 = b'+';
pub const ANY_LENGTH_BYTE_SLICE: &[u8] = b"+";
pub const ANY_LENGTH_STR: &str = "+";
#[derive(Debug, Copy, Clone, PartialEq)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
pub struct ReadSegment {
pub(crate) offset: usize,
pub length: Option<usize>,
pub kind: SegmentType,
}
impl ReadSegment {
pub fn extract_bases<'a, B>(&self, bases: &'a [B]) -> Result<&'a [B], ReadStructureError> {
let end = self.calculate_end(bases)?;
Ok(&bases[self.offset..end])
}
pub fn extract_bases_and_quals<'a, B, Q>(
&self,
bases: &'a [B],
quals: &'a [Q],
) -> Result<(&'a [B], &'a [Q]), ReadStructureError> {
if bases.len() != quals.len() {
return Err(ReadStructureError::MismatchingBasesAndQualsLen {
bases_len: bases.len(),
quals_len: quals.len(),
});
}
let end = self.calculate_end(bases)?;
Ok((&bases[self.offset..end], &quals[self.offset..end]))
}
pub fn length(&self) -> Option<usize> {
self.length
}
pub fn has_length(&self) -> bool {
self.length.is_some()
}
#[inline]
fn calculate_end<T>(&self, bases: &[T]) -> Result<usize, ReadStructureError> {
if bases.len() < self.offset {
return Err(ReadStructureError::ReadEndsBeforeSegment(*self));
}
if let Some(l) = self.length {
if bases.len() < self.offset + l {
return Err(ReadStructureError::ReadEndsAfterSegment(*self));
}
Ok(self.offset + l)
} else {
Ok(bases.len())
}
}
fn clone_with_new_end(&self, end: usize) -> Self {
let option_new_length = if self.offset >= end { None } else { Some(end - self.offset) };
if option_new_length == self.length {
*self
} else {
Self { offset: self.offset, length: option_new_length, kind: self.kind }
}
}
}
impl std::str::FromStr for ReadSegment {
type Err = ReadStructureError;
fn from_str(s: &str) -> Result<Self, Self::Err> {
let rs = ReadStructure::from_str(s)?;
if rs.number_of_segments() == 1 {
Ok(rs.first().copied().unwrap())
} else {
Err(ReadStructureError::ReadSegmentMultipleSegments(s.to_owned()))
}
}
}
impl std::fmt::Display for ReadSegment {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self.length {
Some(l) => write!(f, "{}", l),
None => write!(f, "{}", ANY_LENGTH_STR),
}?;
write!(f, "{}", self.kind.value())
}
}
#[cfg(test)]
mod test {
use crate::read_segment::ReadSegment;
use crate::read_segment::{ANY_LENGTH_BYTE, ANY_LENGTH_STR};
use crate::segment_type::SegmentType;
use bstr::B;
use std::convert::TryFrom;
use std::str::FromStr;
use strum::IntoEnumIterator;
#[test]
fn test_read_segment_length() {
let seg_fixed_length =
ReadSegment { offset: 0, length: Some(10), kind: SegmentType::Template };
assert_eq!(seg_fixed_length.length().unwrap(), 10);
assert!(seg_fixed_length.has_length());
assert_eq!(seg_fixed_length.length().unwrap(), 10);
let seg_no_length = ReadSegment { offset: 0, length: None, kind: SegmentType::Template };
assert!(!seg_no_length.has_length());
}
#[test]
#[should_panic]
fn test_read_segment_fixed_length_panic() {
let seg_no_length = ReadSegment { offset: 0, length: None, kind: SegmentType::Template };
seg_no_length.length().unwrap();
}
#[test]
fn test_read_segment_to_string() {
for tpe in SegmentType::iter() {
let seg_fixed_length = ReadSegment { offset: 0, length: Some(10), kind: tpe };
assert_eq!(seg_fixed_length.to_string(), format!("10{}", tpe.value()));
let seg_no_length = ReadSegment { offset: 0, length: None, kind: tpe };
assert_eq!(seg_no_length.to_string(), format!("{}{}", ANY_LENGTH_STR, tpe.value()));
}
}
#[test]
fn test_read_segment_clone_with_new_end() {
let seg_fixed_length =
ReadSegment { offset: 2, length: Some(10), kind: SegmentType::Template };
assert_eq!(seg_fixed_length.clone_with_new_end(10).length().unwrap(), 8);
assert_eq!(seg_fixed_length.clone_with_new_end(8).length().unwrap(), 6);
assert_eq!(seg_fixed_length.clone_with_new_end(2).length(), None);
assert_eq!(seg_fixed_length.clone_with_new_end(1).length(), None);
let seg_no_length = ReadSegment { offset: 2, length: None, kind: SegmentType::Template };
assert_eq!(seg_no_length.clone_with_new_end(10).length().unwrap(), 8);
assert_eq!(seg_no_length.clone_with_new_end(8).length().unwrap(), 6);
assert_eq!(seg_no_length.clone_with_new_end(2).length(), None);
assert_eq!(seg_no_length.clone_with_new_end(1).length(), None);
}
#[test]
fn test_extract_bases() {
let seg = ReadSegment { offset: 2, length: Some(3), kind: SegmentType::MolecularBarcode };
assert_eq!(seg.extract_bases(B("GATTACA")).unwrap(), b"TTA");
}
#[test]
fn test_extract_bases_and_quals() {
let seg = ReadSegment { offset: 2, length: Some(3), kind: SegmentType::MolecularBarcode };
let sub = seg.extract_bases_and_quals(B("GATTACA"), B("1234567")).unwrap();
assert_eq!(sub.0, B("TTA"));
assert_eq!(sub.1, B("345"));
}
#[test]
fn test_read_segment_from_str() {
assert_eq!(
ReadSegment::from_str("+T").unwrap(),
ReadSegment { offset: 0, length: None, kind: SegmentType::Template }
);
assert_eq!(
ReadSegment::from_str("10S").unwrap(),
ReadSegment { offset: 0, length: Some(10), kind: SegmentType::Skip }
);
}
}