Skip to main content

read_structure/
read_segment.rs

1//! Read Segments
2//!
3//! Type [`ReadSegment`] encapsulates all the information about a segment within
4//! a [`crate::read_structure::ReadStructure`]. A segment can either have a definite length, in which case
5//! length must be `Some(usize)`, or an indefinite length (can be any length, 1 or more)
6//! in which case length must be `None`.
7
8use crate::{ReadStructure, ReadStructureError, segment_type::SegmentType};
9
10/// A character that can be put in place of a number in a read structure to mean "1 or more bases".
11pub const ANY_LENGTH_BYTE: u8 = b'+';
12
13/// Defined for efficiency, same as [`ANY_LENGTH_BYTE`].
14pub const ANY_LENGTH_BYTE_SLICE: &[u8] = b"+";
15
16/// A string that can be put in place of a number in a read structure to mean "1 or more bases".
17pub const ANY_LENGTH_STR: &str = "+";
18
19/// A single segment of a read structure: a segment type and an optional fixed length.
20///
21/// Segments are typically obtained by parsing a [`ReadStructure`]; the positions of the
22/// segment's bases within a read are tracked by the enclosing [`ReadStructure`], not
23/// here.
24#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
25pub struct ReadSegment {
26    /// The optional length of this segment. `None` means this is the indefinite-length
27    /// (`+`) segment; its concrete span is resolved by the enclosing [`ReadStructure`]
28    /// at extract time (it runs from just after the preceding segments up to just
29    /// before the following segments, so in `8B+M10T` the `+M` segment covers
30    /// everything between byte 8 and `read_len - 10`). At most one segment per read
31    /// structure may be indefinite.
32    pub length: Option<usize>,
33    /// The segment type.
34    pub kind: SegmentType,
35}
36
37impl ReadSegment {
38    /// Returns the length of the read segment, or `None` for an indefinite-length (`+`) segment.
39    pub fn length(&self) -> Option<usize> {
40        self.length
41    }
42
43    /// Returns true if the read segment has a defined length (i.e. is not `+`).
44    pub fn has_length(&self) -> bool {
45        self.length.is_some()
46    }
47}
48
49impl std::str::FromStr for ReadSegment {
50    type Err = ReadStructureError;
51
52    /// Builds a [`ReadSegment`] from a string representation.  The character representation
53    /// of [`SegmentType`] must be the last character, while the leading character(s) either
54    /// a non-zero integer, or the any-length character.
55    ///
56    /// # Errors
57    ///
58    /// Returns `Err` if the string was too short, if the length could not be parsed, or if
59    /// the segment type could not be recognized.
60    fn from_str(s: &str) -> Result<Self, Self::Err> {
61        let rs = ReadStructure::from_str(s)?;
62        if rs.number_of_segments() == 1 {
63            // Unwrap is safe since we checked the length
64            Ok(rs.first().copied().unwrap())
65        } else {
66            Err(ReadStructureError::ReadSegmentMultipleSegments(s.to_owned()))
67        }
68    }
69}
70
71impl std::fmt::Display for ReadSegment {
72    /// Formats the [`ReadSegment`] as a string.
73    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
74        match self.length {
75            Some(l) => write!(f, "{}", l),
76            None => write!(f, "{}", ANY_LENGTH_STR),
77        }?;
78        write!(f, "{}", self.kind.value())
79    }
80}
81
82#[cfg(test)]
83mod test {
84    use crate::read_segment::ANY_LENGTH_STR;
85    use crate::read_segment::ReadSegment;
86    use crate::segment_type::SegmentType;
87    use std::str::FromStr;
88    use strum::IntoEnumIterator;
89
90    #[test]
91    fn test_read_segment_length() {
92        let seg_fixed_length = ReadSegment { length: Some(10), kind: SegmentType::Template };
93        assert_eq!(seg_fixed_length.length().unwrap(), 10);
94        assert!(seg_fixed_length.has_length());
95        let seg_no_length = ReadSegment { length: None, kind: SegmentType::Template };
96        assert!(!seg_no_length.has_length());
97    }
98
99    #[test]
100    #[should_panic]
101    fn test_read_segment_fixed_length_panic() {
102        let seg_no_length = ReadSegment { length: None, kind: SegmentType::Template };
103        seg_no_length.length().unwrap();
104    }
105
106    #[test]
107    fn test_read_segment_to_string() {
108        for tpe in SegmentType::iter() {
109            let seg_fixed_length = ReadSegment { length: Some(10), kind: tpe };
110            assert_eq!(seg_fixed_length.to_string(), format!("10{}", tpe.value()));
111            let seg_no_length = ReadSegment { length: None, kind: tpe };
112            assert_eq!(seg_no_length.to_string(), format!("{}{}", ANY_LENGTH_STR, tpe.value()));
113        }
114    }
115
116    #[test]
117    fn test_read_segment_from_str() {
118        assert_eq!(
119            ReadSegment::from_str("+T").unwrap(),
120            ReadSegment { length: None, kind: SegmentType::Template }
121        );
122        assert_eq!(
123            ReadSegment::from_str("10S").unwrap(),
124            ReadSegment { length: Some(10), kind: SegmentType::Skip }
125        );
126    }
127}