read_structure/
read_segment.rs

1//! Read Segments
2//!
3//! Type [`ReadSegment`] encapsulates all the information about a segment within
4//! a [`crate::read_structure::ReadStructure`]. A segment can either have a definite length, in which case
5//! length must be `Some(usize)`, or an indefinite length (can be any length, 1 or more)
6//! in which case length must be `None`.
7
8use std::{convert::TryFrom, io::Read};
9
10use crate::{segment_type::SegmentType, ReadStructure, ReadStructureError};
11
12/// A character that can be put in place of a number in a read structure to mean "1 or more bases".
13pub const ANY_LENGTH_BYTE: u8 = b'+';
14
15/// Defined for efficiency, same as [`ANY_LENGTH_BYTE`].
16pub const ANY_LENGTH_BYTE_SLICE: &[u8] = b"+";
17
18/// A string that can be put in place of a number in a read structure to mean "1 or more bases".
19pub const ANY_LENGTH_STR: &str = "+";
20
21/// The read segment describing a given kind ([`SegmentType`]), optional length, and offset of the
22/// bases within a [`crate::read_structure::ReadStructure`].
23#[derive(Debug, Copy, Clone, PartialEq)]
24#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
25pub struct ReadSegment {
26    /// The offset in the read if the segment belongs to a read structure
27    pub(crate) offset: usize,
28    /// The optional length of this segment
29    pub length: Option<usize>,
30    /// The segment type
31    pub kind: SegmentType,
32}
33
34impl ReadSegment {
35    /// Extract the bases corresponding to this [`ReadSegment`] from a slice.
36    ///
37    /// # Errors
38    ///
39    /// - If the segment does not fall wholely within the slice.
40    pub fn extract_bases<'a, B>(&self, bases: &'a [B]) -> Result<&'a [B], ReadStructureError> {
41        let end = self.calculate_end(bases)?;
42        Ok(&bases[self.offset..end])
43    }
44
45    /// Extract the bases and corresponding quals to this [`ReadSegment`] from a slice.
46    ///
47    /// # Errors
48    ///
49    /// - If the segment does not fall wholely within the slice.
50    /// - If the bases and quals lengths are not equal.
51    pub fn extract_bases_and_quals<'a, B, Q>(
52        &self,
53        bases: &'a [B],
54        quals: &'a [Q],
55    ) -> Result<(&'a [B], &'a [Q]), ReadStructureError> {
56        if bases.len() != quals.len() {
57            return Err(ReadStructureError::MismatchingBasesAndQualsLen {
58                bases_len: bases.len(),
59                quals_len: quals.len(),
60            });
61        }
62        let end = self.calculate_end(bases)?;
63        Ok((&bases[self.offset..end], &quals[self.offset..end]))
64    }
65
66    /// Returns the length of the read segment.
67    pub fn length(&self) -> Option<usize> {
68        self.length
69    }
70
71    /// Returns true if the read segment has a length defined (i.e. not `None`)
72    pub fn has_length(&self) -> bool {
73        self.length.is_some()
74    }
75
76    /// Returns the end position for the segment for the given read.
77    ///
78    /// # Errors
79    ///
80    /// Errors if the read ends before the segment starts.
81    #[inline]
82    fn calculate_end<T>(&self, bases: &[T]) -> Result<usize, ReadStructureError> {
83        if bases.len() < self.offset {
84            return Err(ReadStructureError::ReadEndsBeforeSegment(*self));
85        }
86        if let Some(l) = self.length {
87            if bases.len() < self.offset + l {
88                return Err(ReadStructureError::ReadEndsAfterSegment(*self));
89            }
90            Ok(self.offset + l)
91        } else {
92            Ok(bases.len())
93        }
94    }
95
96    /// Clone the read segment but with an updated end. If the new end is before
97    /// the current offset, the read segment will have no length defined.
98    /// Otherwise, the new length will be reduced based on the offset (`end - offset`).
99    fn clone_with_new_end(&self, end: usize) -> Self {
100        let option_new_length = if self.offset >= end { None } else { Some(end - self.offset) };
101        if option_new_length == self.length {
102            *self
103        } else {
104            Self { offset: self.offset, length: option_new_length, kind: self.kind }
105        }
106    }
107}
108
109impl std::str::FromStr for ReadSegment {
110    type Err = ReadStructureError;
111
112    /// Builds a [`ReadSegment`] from a string representation.  The character representation
113    /// of [`SegmentType`] must be the last character, while the leading character(s) either
114    /// a non-zero integer, or the any-length character.
115    ///
116    /// # Errors
117    ///
118    /// Returns `Err` if the string was too short, if the length could not be parsed, or if
119    /// the segment type could not be recognized.
120    fn from_str(s: &str) -> Result<Self, Self::Err> {
121        let rs = ReadStructure::from_str(s)?;
122        if rs.number_of_segments() == 1 {
123            // Unwrap is safe since we checked the length
124            Ok(rs.first().copied().unwrap())
125        } else {
126            Err(ReadStructureError::ReadSegmentMultipleSegments(s.to_owned()))
127        }
128    }
129}
130
131impl std::fmt::Display for ReadSegment {
132    /// Formats the [`ReadSegment`] as a string.
133    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
134        match self.length {
135            Some(l) => write!(f, "{}", l),
136            None => write!(f, "{}", ANY_LENGTH_STR),
137        }?;
138        write!(f, "{}", self.kind.value())
139    }
140}
141
142#[cfg(test)]
143mod test {
144    use crate::read_segment::ReadSegment;
145    use crate::read_segment::{ANY_LENGTH_BYTE, ANY_LENGTH_STR};
146    use crate::segment_type::SegmentType;
147    use bstr::B;
148    use std::convert::TryFrom;
149    use std::str::FromStr;
150    use strum::IntoEnumIterator;
151
152    #[test]
153    fn test_read_segment_length() {
154        let seg_fixed_length =
155            ReadSegment { offset: 0, length: Some(10), kind: SegmentType::Template };
156        assert_eq!(seg_fixed_length.length().unwrap(), 10);
157        assert!(seg_fixed_length.has_length());
158        assert_eq!(seg_fixed_length.length().unwrap(), 10);
159        let seg_no_length = ReadSegment { offset: 0, length: None, kind: SegmentType::Template };
160        assert!(!seg_no_length.has_length());
161    }
162
163    #[test]
164    #[should_panic]
165    fn test_read_segment_fixed_length_panic() {
166        let seg_no_length = ReadSegment { offset: 0, length: None, kind: SegmentType::Template };
167        seg_no_length.length().unwrap();
168    }
169
170    #[test]
171    fn test_read_segment_to_string() {
172        for tpe in SegmentType::iter() {
173            let seg_fixed_length = ReadSegment { offset: 0, length: Some(10), kind: tpe };
174            assert_eq!(seg_fixed_length.to_string(), format!("10{}", tpe.value()));
175            let seg_no_length = ReadSegment { offset: 0, length: None, kind: tpe };
176            assert_eq!(seg_no_length.to_string(), format!("{}{}", ANY_LENGTH_STR, tpe.value()));
177        }
178    }
179
180    #[test]
181    fn test_read_segment_clone_with_new_end() {
182        let seg_fixed_length =
183            ReadSegment { offset: 2, length: Some(10), kind: SegmentType::Template };
184        assert_eq!(seg_fixed_length.clone_with_new_end(10).length().unwrap(), 8);
185        assert_eq!(seg_fixed_length.clone_with_new_end(8).length().unwrap(), 6);
186        assert_eq!(seg_fixed_length.clone_with_new_end(2).length(), None);
187        assert_eq!(seg_fixed_length.clone_with_new_end(1).length(), None);
188        let seg_no_length = ReadSegment { offset: 2, length: None, kind: SegmentType::Template };
189        assert_eq!(seg_no_length.clone_with_new_end(10).length().unwrap(), 8);
190        assert_eq!(seg_no_length.clone_with_new_end(8).length().unwrap(), 6);
191        assert_eq!(seg_no_length.clone_with_new_end(2).length(), None);
192        assert_eq!(seg_no_length.clone_with_new_end(1).length(), None);
193    }
194
195    #[test]
196    fn test_extract_bases() {
197        let seg = ReadSegment { offset: 2, length: Some(3), kind: SegmentType::MolecularBarcode };
198        assert_eq!(seg.extract_bases(B("GATTACA")).unwrap(), b"TTA");
199    }
200
201    #[test]
202    fn test_extract_bases_and_quals() {
203        let seg = ReadSegment { offset: 2, length: Some(3), kind: SegmentType::MolecularBarcode };
204        let sub = seg.extract_bases_and_quals(B("GATTACA"), B("1234567")).unwrap();
205        assert_eq!(sub.0, B("TTA"));
206        assert_eq!(sub.1, B("345"));
207    }
208
209    #[test]
210    fn test_read_segment_from_str() {
211        assert_eq!(
212            ReadSegment::from_str("+T").unwrap(),
213            ReadSegment { offset: 0, length: None, kind: SegmentType::Template }
214        );
215        assert_eq!(
216            ReadSegment::from_str("10S").unwrap(),
217            ReadSegment { offset: 0, length: Some(10), kind: SegmentType::Skip }
218        );
219    }
220}