read_structure/
read_segment.rs1use std::{convert::TryFrom, io::Read};
9
10use crate::{segment_type::SegmentType, ReadStructure, ReadStructureError};
11
12pub const ANY_LENGTH_BYTE: u8 = b'+';
14
15pub const ANY_LENGTH_BYTE_SLICE: &[u8] = b"+";
17
18pub const ANY_LENGTH_STR: &str = "+";
20
21#[derive(Debug, Copy, Clone, PartialEq)]
24#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
25pub struct ReadSegment {
26 pub(crate) offset: usize,
28 pub length: Option<usize>,
30 pub kind: SegmentType,
32}
33
34impl ReadSegment {
35 pub fn extract_bases<'a, B>(&self, bases: &'a [B]) -> Result<&'a [B], ReadStructureError> {
41 let end = self.calculate_end(bases)?;
42 Ok(&bases[self.offset..end])
43 }
44
45 pub fn extract_bases_and_quals<'a, B, Q>(
52 &self,
53 bases: &'a [B],
54 quals: &'a [Q],
55 ) -> Result<(&'a [B], &'a [Q]), ReadStructureError> {
56 if bases.len() != quals.len() {
57 return Err(ReadStructureError::MismatchingBasesAndQualsLen {
58 bases_len: bases.len(),
59 quals_len: quals.len(),
60 });
61 }
62 let end = self.calculate_end(bases)?;
63 Ok((&bases[self.offset..end], &quals[self.offset..end]))
64 }
65
66 pub fn length(&self) -> Option<usize> {
68 self.length
69 }
70
71 pub fn has_length(&self) -> bool {
73 self.length.is_some()
74 }
75
76 #[inline]
82 fn calculate_end<T>(&self, bases: &[T]) -> Result<usize, ReadStructureError> {
83 if bases.len() < self.offset {
84 return Err(ReadStructureError::ReadEndsBeforeSegment(*self));
85 }
86 if let Some(l) = self.length {
87 if bases.len() < self.offset + l {
88 return Err(ReadStructureError::ReadEndsAfterSegment(*self));
89 }
90 Ok(self.offset + l)
91 } else {
92 Ok(bases.len())
93 }
94 }
95
96 fn clone_with_new_end(&self, end: usize) -> Self {
100 let option_new_length = if self.offset >= end { None } else { Some(end - self.offset) };
101 if option_new_length == self.length {
102 *self
103 } else {
104 Self { offset: self.offset, length: option_new_length, kind: self.kind }
105 }
106 }
107}
108
109impl std::str::FromStr for ReadSegment {
110 type Err = ReadStructureError;
111
112 fn from_str(s: &str) -> Result<Self, Self::Err> {
121 let rs = ReadStructure::from_str(s)?;
122 if rs.number_of_segments() == 1 {
123 Ok(rs.first().copied().unwrap())
125 } else {
126 Err(ReadStructureError::ReadSegmentMultipleSegments(s.to_owned()))
127 }
128 }
129}
130
131impl std::fmt::Display for ReadSegment {
132 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
134 match self.length {
135 Some(l) => write!(f, "{}", l),
136 None => write!(f, "{}", ANY_LENGTH_STR),
137 }?;
138 write!(f, "{}", self.kind.value())
139 }
140}
141
142#[cfg(test)]
143mod test {
144 use crate::read_segment::ReadSegment;
145 use crate::read_segment::{ANY_LENGTH_BYTE, ANY_LENGTH_STR};
146 use crate::segment_type::SegmentType;
147 use bstr::B;
148 use std::convert::TryFrom;
149 use std::str::FromStr;
150 use strum::IntoEnumIterator;
151
152 #[test]
153 fn test_read_segment_length() {
154 let seg_fixed_length =
155 ReadSegment { offset: 0, length: Some(10), kind: SegmentType::Template };
156 assert_eq!(seg_fixed_length.length().unwrap(), 10);
157 assert!(seg_fixed_length.has_length());
158 assert_eq!(seg_fixed_length.length().unwrap(), 10);
159 let seg_no_length = ReadSegment { offset: 0, length: None, kind: SegmentType::Template };
160 assert!(!seg_no_length.has_length());
161 }
162
163 #[test]
164 #[should_panic]
165 fn test_read_segment_fixed_length_panic() {
166 let seg_no_length = ReadSegment { offset: 0, length: None, kind: SegmentType::Template };
167 seg_no_length.length().unwrap();
168 }
169
170 #[test]
171 fn test_read_segment_to_string() {
172 for tpe in SegmentType::iter() {
173 let seg_fixed_length = ReadSegment { offset: 0, length: Some(10), kind: tpe };
174 assert_eq!(seg_fixed_length.to_string(), format!("10{}", tpe.value()));
175 let seg_no_length = ReadSegment { offset: 0, length: None, kind: tpe };
176 assert_eq!(seg_no_length.to_string(), format!("{}{}", ANY_LENGTH_STR, tpe.value()));
177 }
178 }
179
180 #[test]
181 fn test_read_segment_clone_with_new_end() {
182 let seg_fixed_length =
183 ReadSegment { offset: 2, length: Some(10), kind: SegmentType::Template };
184 assert_eq!(seg_fixed_length.clone_with_new_end(10).length().unwrap(), 8);
185 assert_eq!(seg_fixed_length.clone_with_new_end(8).length().unwrap(), 6);
186 assert_eq!(seg_fixed_length.clone_with_new_end(2).length(), None);
187 assert_eq!(seg_fixed_length.clone_with_new_end(1).length(), None);
188 let seg_no_length = ReadSegment { offset: 2, length: None, kind: SegmentType::Template };
189 assert_eq!(seg_no_length.clone_with_new_end(10).length().unwrap(), 8);
190 assert_eq!(seg_no_length.clone_with_new_end(8).length().unwrap(), 6);
191 assert_eq!(seg_no_length.clone_with_new_end(2).length(), None);
192 assert_eq!(seg_no_length.clone_with_new_end(1).length(), None);
193 }
194
195 #[test]
196 fn test_extract_bases() {
197 let seg = ReadSegment { offset: 2, length: Some(3), kind: SegmentType::MolecularBarcode };
198 assert_eq!(seg.extract_bases(B("GATTACA")).unwrap(), b"TTA");
199 }
200
201 #[test]
202 fn test_extract_bases_and_quals() {
203 let seg = ReadSegment { offset: 2, length: Some(3), kind: SegmentType::MolecularBarcode };
204 let sub = seg.extract_bases_and_quals(B("GATTACA"), B("1234567")).unwrap();
205 assert_eq!(sub.0, B("TTA"));
206 assert_eq!(sub.1, B("345"));
207 }
208
209 #[test]
210 fn test_read_segment_from_str() {
211 assert_eq!(
212 ReadSegment::from_str("+T").unwrap(),
213 ReadSegment { offset: 0, length: None, kind: SegmentType::Template }
214 );
215 assert_eq!(
216 ReadSegment::from_str("10S").unwrap(),
217 ReadSegment { offset: 0, length: Some(10), kind: SegmentType::Skip }
218 );
219 }
220}