1use crate::read_segment;
9use crate::read_segment::ReadSegment;
10use crate::read_segment::ANY_LENGTH_BYTE;
11use crate::segment_type::SegmentType;
12use crate::ErrorMessageParts;
13use crate::ReadStructureError;
14use std::convert::TryFrom;
15use std::ops::Index;
16use std::string;
17use std::string::ToString;
18
19#[derive(Debug, Clone, PartialEq)]
21#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
22pub struct ReadStructure {
23 elements: Vec<ReadSegment>,
25 length_of_fixed_segments: usize,
27}
28
29impl ReadStructure {
30 #[allow(clippy::missing_panics_doc)]
44 pub fn new(mut segments: Vec<ReadSegment>) -> Result<Self, ReadStructureError> {
45 if segments.is_empty() {
46 return Err(ReadStructureError::ReadStructureContainsZeroElements);
47 }
48
49 let mut num_indefinite = 0;
50 let mut length_of_fixed_segments = 0;
51 for s in &segments {
52 if let Some(len) = s.length {
53 length_of_fixed_segments += len;
54 } else {
55 num_indefinite += 1;
56 }
57 }
58
59 if segments.last().unwrap().has_length() {
60 if num_indefinite != 0 {
61 return Err(
62 ReadStructureError::ReadStructureNonTerminalIndefiniteLengthReadSegment(
63 *segments.iter().find(|s| !s.has_length()).unwrap(),
64 ),
65 );
66 }
67 } else if num_indefinite > 1 {
68 return Err(ReadStructureError::ReadStructureNonTerminalIndefiniteLengthReadSegment(
69 *segments.iter().find(|s| !s.has_length()).unwrap(),
70 ));
71 }
72
73 let mut off: usize = 0;
74 for segment in &mut segments {
75 segment.offset = off;
76 off += segment.length.unwrap_or(0);
77 }
78 Ok(ReadStructure { elements: segments, length_of_fixed_segments })
79 }
80
81 pub fn has_fixed_length(&self) -> bool {
84 self.elements.last().unwrap().has_length()
85 }
86
87 pub fn fixed_length(&self) -> Option<usize> {
89 if self.has_fixed_length() {
90 Some(self.length_of_fixed_segments)
91 } else {
92 None
93 }
94 }
95
96 pub fn number_of_segments(&self) -> usize {
98 self.elements.len()
99 }
100
101 pub fn segments(&self) -> &[ReadSegment] {
103 &self.elements
104 }
105
106 pub fn iter(&self) -> impl Iterator<Item = &ReadSegment> {
108 self.elements.iter()
109 }
110
111 pub fn segments_by_type(&self, kind: SegmentType) -> impl Iterator<Item = &ReadSegment> {
113 self.elements.iter().filter(move |seg| seg.kind == kind)
114 }
115
116 pub fn templates(&self) -> impl Iterator<Item = &ReadSegment> {
118 self.segments_by_type(SegmentType::Template)
119 }
120
121 pub fn sample_barcodes(&self) -> impl Iterator<Item = &ReadSegment> {
123 self.segments_by_type(SegmentType::SampleBarcode)
124 }
125
126 pub fn molecular_barcodes(&self) -> impl Iterator<Item = &ReadSegment> {
128 self.segments_by_type(SegmentType::MolecularBarcode)
129 }
130
131 pub fn skips(&self) -> impl Iterator<Item = &ReadSegment> {
133 self.segments_by_type(SegmentType::Skip)
134 }
135
136 pub fn cellular_barcodes(&self) -> impl Iterator<Item = &ReadSegment> {
138 self.segments_by_type(SegmentType::CellularBarcode)
139 }
140
141 pub fn first(&self) -> Option<&ReadSegment> {
143 self.elements.first()
144 }
145
146 pub fn last(&self) -> Option<&ReadSegment> {
148 self.elements.last()
149 }
150}
151
152impl IntoIterator for ReadStructure {
153 type Item = ReadSegment;
154
155 type IntoIter = std::vec::IntoIter<Self::Item>;
156
157 fn into_iter(self) -> Self::IntoIter {
158 self.elements.into_iter()
159 }
160}
161
162impl Index<usize> for ReadStructure {
163 type Output = ReadSegment;
164
165 fn index(&self, idx: usize) -> &Self::Output {
167 &self.elements[idx]
168 }
169}
170
171impl std::fmt::Display for ReadStructure {
172 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
174 for e in &self.elements {
175 write!(f, "{}", e)?;
176 }
177 Ok(())
178 }
179}
180
181impl std::str::FromStr for ReadStructure {
182 type Err = ReadStructureError;
183
184 fn from_str(rs: &str) -> Result<Self, Self::Err> {
186 let mut offset = 0;
187 let mut i = 0;
188 let mut segs: Vec<ReadSegment> = Vec::new();
189 let chars: Vec<char> = rs.to_uppercase().chars().filter(|c| !c.is_whitespace()).collect();
190 while i < chars.len() {
191 let parse_i = i;
193
194 let length = if chars[i] as u8 == ANY_LENGTH_BYTE {
196 i += 1;
197 None
198 } else if chars[i].is_digit(10) {
199 let mut len: usize = 0;
200 while i < chars.len() && chars[i].is_digit(10) {
201 let digit = chars[i].to_digit(10).unwrap() as usize;
203 len = (len * 10) + digit;
204 i += 1;
205 }
206 Some(len)
207 } else {
208 return Err(ReadStructureError::ReadStructureMissingLengthInformation(
209 ErrorMessageParts::new(&chars, parse_i, parse_i + 1),
210 ));
211 };
212
213 if chars.len() == i {
215 return Err(ReadStructureError::ReadStructureMissingOperator(
216 ErrorMessageParts::new(&chars, parse_i, i),
217 ));
218 } else if let Ok(kind) = SegmentType::try_from(chars[i]) {
219 if length.map_or(false, |l| l == 0) {
220 return Err(ReadStructureError::ReadSegmentLengthZero(ErrorMessageParts::new(
221 &chars, parse_i, i,
222 )));
223 }
224 i += 1;
225 segs.push(ReadSegment { offset, length, kind });
226 offset += length.unwrap_or(0);
227 } else {
228 return Err(ReadStructureError::ReadStructureHadUnknownType(
229 ErrorMessageParts::new(&chars, parse_i, i + 1),
230 ));
231 }
232 }
233
234 ReadStructure::new(segs)
235 }
236}
237
238impl TryFrom<&[ReadSegment]> for ReadStructure {
239 type Error = ReadStructureError;
240 fn try_from(elements: &[ReadSegment]) -> Result<Self, Self::Error> {
242 Self::new(elements.to_vec())
243 }
244}
245
246#[cfg(test)]
247mod test {
248 use crate::read_structure::ReadStructure;
249 use std::str::FromStr;
250
251 #[test]
252 fn test_read_structure_from_str() {
253 let rss =
254 ["1T", "1B", "1M", "1S", "101T", "5B101T", "123456789T", "10T10B10B10S10M", "5B2C3T"];
255 for rs in &rss {
256 assert_eq!(ReadStructure::from_str(rs).unwrap().to_string(), *rs);
257 }
258 }
259
260 #[test]
261 fn test_read_structure_from_str_with_whitespace() {
262 let rss = ["75T 8B 8B 75T", " 75T 8B 8B\t75T "];
263 for rs in &rss {
264 assert_eq!(ReadStructure::from_str(rs).unwrap().to_string(), "75T8B8B75T");
265 }
266 }
267
268 #[test]
269 fn test_read_structure_allow_anylength_char_only_once_and_for_last_segment() {
270 assert_eq!(ReadStructure::from_str("5M+T").unwrap().to_string(), "5M+T");
271 assert_eq!(ReadStructure::from_str("+M").unwrap().to_string(), "+M");
272 }
273
274 macro_rules! test_read_structure_from_str_err {
275 ($($name:ident: $value:expr,)*) => {
276 $(
277 #[test]
278 fn $name() {
279 assert!(ReadStructure::from_str($value).is_err());
280 }
281 )*
282 }
283 }
284
285 test_read_structure_from_str_err! {
286 test_read_structure_allow_any_char_only_once_and_for_last_segment_panic_0: "++M",
287 test_read_structure_allow_any_char_only_once_and_for_last_segment_panic_1: "5M++T",
288 test_read_structure_allow_any_char_only_once_and_for_last_segment_panic_2: "5M70+T",
289 test_read_structure_allow_any_char_only_once_and_for_last_segment_panic_3: "+M+T",
290 test_read_structure_allow_any_char_only_once_and_for_last_segment_panic_4: "+M70T",
291 }
292
293 macro_rules! test_read_structure_from_str_invalid {
294 ($($name:ident: $value:expr,)*) => {
295 $(
296 #[test]
297 fn $name() {
298 let (input, expected) = $value;
299 let actual = ReadStructure::from_str(input);
300 assert!(actual.unwrap_err().to_string().ends_with(expected));
301 }
302 )*
303 }
304 }
305
306 test_read_structure_from_str_invalid! {
307 test_read_structure_from_str_invalid_0: ("9R", "[9R]"),
308 test_read_structure_from_str_invalid_1: ("T", "[T]"),
309 test_read_structure_from_str_invalid_2: ("23TT", "23T[T]"),
310 test_read_structure_from_str_invalid_3: ("23T2", "23T[2]"),
311 test_read_structure_from_str_invalid_4: ("23T2TT23T", "23T2T[T]23T"),
312 }
313
314 #[test]
315 fn test_read_structure_collect_segments() {
316 let rs = ReadStructure::from_str("10M9T8B7S3C10M9T8B7S2C").unwrap();
317 let templates: String = rs.templates().map(|s| s.to_string()).collect();
318 assert_eq!(templates, "9T9T");
319 let sample_barcodes: String = rs.sample_barcodes().map(|s| s.to_string()).collect();
320 assert_eq!(sample_barcodes, "8B8B");
321 let molecular_barcodes: String = rs.molecular_barcodes().map(|s| s.to_string()).collect();
322 assert_eq!(molecular_barcodes, "10M10M");
323 let skips: String = rs.skips().map(|s| s.to_string()).collect();
324 assert_eq!(skips, "7S7S");
325 let cellular_barcodes: String = rs.cellular_barcodes().map(|s| s.to_string()).collect();
326 assert_eq!(cellular_barcodes, "3C2C");
327 }
328
329 macro_rules! test_read_structure_length {
330 ($($name:ident: $value:expr,)*) => {
331 $(
332 #[test]
333 fn $name() {
334 let (input, expected) = $value;
335 let actual = ReadStructure::from_str(input).unwrap().number_of_segments();
336 assert_eq!(actual, expected);
337 }
338 )*
339 }
340 }
341
342 test_read_structure_length! {
343 test_read_structure_length_0: ("1T", 1),
344 test_read_structure_length_1: ("1B", 1),
345 test_read_structure_length_2: ("1M", 1),
346 test_read_structure_length_3: ("1S", 1),
347 test_read_structure_length_4: ("101T", 1),
348 test_read_structure_length_5: ("5B101T", 2),
349 test_read_structure_length_6: ("123456789T", 1),
350 test_read_structure_length_7: ("10T10B10B10S10M", 5),
351 }
352
353 macro_rules! test_read_structure_index {
354 ($($name:ident: $value:expr,)*) => {
355 $(
356 #[test]
357 fn $name() {
358 let (string, index, exp_string, exp_offset) = $value;
359 let read_structure = ReadStructure::from_str(string).unwrap();
360 let read_segment = read_structure[index];
361 assert_eq!(read_segment.to_string(), exp_string);
362 assert_eq!(read_segment.offset, exp_offset);
363 }
364 )*
365 }
366 }
367
368 test_read_structure_index! {
369 test_read_structure_index_0: ("1T", 0, "1T", 0),
370 test_read_structure_index_1: ("1B", 0, "1B", 0),
371 test_read_structure_index_2: ("1M", 0, "1M", 0),
372 test_read_structure_index_3: ("1S", 0, "1S", 0),
373 test_read_structure_index_4: ("101T", 0, "101T", 0),
374 test_read_structure_index_5: ("5B101T", 0, "5B", 0),
375 test_read_structure_index_6: ("5B101T", 1, "101T", 5),
376 test_read_structure_index_7: ("123456789T", 0, "123456789T", 0),
377 test_read_structure_index_8: ("10T10B10B10S10M", 0, "10T", 0),
378 test_read_structure_index_9: ("10T10B10B10S10M", 1, "10B", 10),
379 test_read_structure_index_10: ("10T10B10B10S10M", 2, "10B", 20),
380 test_read_structure_index_11: ("10T10B10B10S10M", 3, "10S", 30),
381 test_read_structure_index_12: ("10T10B10B10S10M", 4, "10M", 40),
382 test_read_structure_index_32: ("10T10B10B10S10C10M", 4, "10C", 40),
383 }
384
385 #[test]
386 #[cfg(feature = "serde")]
387 fn test_serde() {
388 let rs = ReadStructure::from_str("10T10B10B10S10M").unwrap();
389 let rs_json = serde_json::to_string(&rs).unwrap();
390 let rs2 = serde_json::from_str(&rs_json).unwrap();
391 assert_eq!(rs, rs2);
392 }
393}