1use crate::ErrorMessageParts;
11use crate::ReadStructureError;
12use crate::read_segment::ANY_LENGTH_BYTE;
13use crate::read_segment::ReadSegment;
14use crate::segment_type::SegmentType;
15use std::iter::FusedIterator;
16use std::ops::Index;
17
18#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)]
21pub enum SkipHandling {
22 Include,
24 Exclude,
26}
27
28#[derive(Debug, Clone, PartialEq)]
35#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
36#[cfg_attr(feature = "serde", serde(into = "String", try_from = "String"))]
37pub struct ReadStructure {
38 elements: Vec<ReadSegment>,
40 length_of_fixed_segments: usize,
42 plus_index: Option<usize>,
44 post_plus_len: usize,
48 offsets: Vec<isize>,
57}
58
59impl ReadStructure {
60 #[allow(clippy::missing_panics_doc)]
70 pub fn new(segments: Vec<ReadSegment>) -> Result<Self, ReadStructureError> {
71 if segments.is_empty() {
72 return Err(ReadStructureError::ReadStructureContainsZeroElements);
73 }
74
75 let mut num_indefinite = 0;
76 let mut length_of_fixed_segments = 0;
77 let mut plus_index: Option<usize> = None;
78 for (i, s) in segments.iter().enumerate() {
79 if let Some(len) = s.length {
80 length_of_fixed_segments += len;
81 } else {
82 num_indefinite += 1;
83 if plus_index.is_none() {
84 plus_index = Some(i);
85 }
86 }
87 }
88
89 if num_indefinite > 1 {
90 return Err(ReadStructureError::ReadStructureMultipleIndefiniteLengthSegments(
91 *segments.iter().find(|s| !s.has_length()).unwrap(),
92 ));
93 }
94
95 let n = segments.len();
99 let mut offsets = vec![0isize; n];
100
101 let forward_end = plus_index.map_or(n, |p| p + 1);
103 let mut off: usize = 0;
104 for (i, seg) in segments.iter().take(forward_end).enumerate() {
105 offsets[i] = off as isize;
106 off += seg.length.unwrap_or(0);
107 }
108
109 let mut post_plus_len: usize = 0;
111 if let Some(p) = plus_index {
112 let mut dist_from_end: usize = 0;
115 for (i, seg) in segments.iter().enumerate().skip(p + 1).rev() {
116 let len = seg.length.expect("post-+ segments must be fixed length");
118 dist_from_end += len;
119 offsets[i] = -(dist_from_end as isize);
120 }
121 post_plus_len = dist_from_end;
122 }
123
124 Ok(ReadStructure {
125 elements: segments,
126 length_of_fixed_segments,
127 plus_index,
128 post_plus_len,
129 offsets,
130 })
131 }
132
133 pub fn extract<'rs, 'b>(
154 &'rs self,
155 bases: &'b [u8],
156 quals: &'b [u8],
157 skip_handling: SkipHandling,
158 ) -> Result<ExtractedSegments<'rs, 'b>, ReadStructureError> {
159 if bases.len() != quals.len() {
160 return Err(ReadStructureError::MismatchingBasesAndQualsLen {
161 bases_len: bases.len(),
162 quals_len: quals.len(),
163 });
164 }
165
166 let required = if self.plus_index.is_some() {
167 self.length_of_fixed_segments + 1
168 } else {
169 self.length_of_fixed_segments
170 };
171 if bases.len() < required {
172 return Err(ReadStructureError::ReadTooShort { read_len: bases.len(), required });
173 }
174 if self.plus_index.is_none() && bases.len() > self.length_of_fixed_segments {
175 return Err(ReadStructureError::ReadTooLong {
176 read_len: bases.len(),
177 expected: self.length_of_fixed_segments,
178 });
179 }
180
181 Ok(ExtractedSegments {
182 elements: &self.elements,
183 offsets: &self.offsets,
184 plus_index: self.plus_index,
185 post_plus_len: self.post_plus_len,
186 bases,
187 quals,
188 skip_handling,
189 next_index: 0,
190 })
191 }
192
193 pub fn has_fixed_length(&self) -> bool {
196 self.plus_index.is_none()
197 }
198
199 pub fn fixed_length(&self) -> Option<usize> {
201 if self.has_fixed_length() { Some(self.length_of_fixed_segments) } else { None }
202 }
203
204 pub fn number_of_segments(&self) -> usize {
206 self.elements.len()
207 }
208
209 pub fn segments(&self) -> &[ReadSegment] {
211 &self.elements
212 }
213
214 pub fn iter(&self) -> impl Iterator<Item = &ReadSegment> {
216 self.elements.iter()
217 }
218
219 pub fn segments_by_type(&self, kind: SegmentType) -> impl Iterator<Item = &ReadSegment> {
221 self.elements.iter().filter(move |seg| seg.kind == kind)
222 }
223
224 pub fn templates(&self) -> impl Iterator<Item = &ReadSegment> {
226 self.segments_by_type(SegmentType::Template)
227 }
228
229 pub fn sample_barcodes(&self) -> impl Iterator<Item = &ReadSegment> {
231 self.segments_by_type(SegmentType::SampleBarcode)
232 }
233
234 pub fn molecular_barcodes(&self) -> impl Iterator<Item = &ReadSegment> {
236 self.segments_by_type(SegmentType::MolecularBarcode)
237 }
238
239 pub fn skips(&self) -> impl Iterator<Item = &ReadSegment> {
241 self.segments_by_type(SegmentType::Skip)
242 }
243
244 pub fn cellular_barcodes(&self) -> impl Iterator<Item = &ReadSegment> {
246 self.segments_by_type(SegmentType::CellularBarcode)
247 }
248
249 pub fn first(&self) -> Option<&ReadSegment> {
251 self.elements.first()
252 }
253
254 pub fn last(&self) -> Option<&ReadSegment> {
256 self.elements.last()
257 }
258}
259
260#[derive(Debug, Clone)]
267pub struct ExtractedSegments<'rs, 'b> {
268 elements: &'rs [ReadSegment],
269 offsets: &'rs [isize],
270 plus_index: Option<usize>,
271 post_plus_len: usize,
272 bases: &'b [u8],
273 quals: &'b [u8],
274 skip_handling: SkipHandling,
275 next_index: usize,
276}
277
278impl<'rs, 'b> Iterator for ExtractedSegments<'rs, 'b> {
279 type Item = (&'rs ReadSegment, &'b [u8], &'b [u8]);
280
281 fn next(&mut self) -> Option<Self::Item> {
282 while self.next_index < self.elements.len() {
283 let i = self.next_index;
284 self.next_index += 1;
285 let seg = &self.elements[i];
286 if self.skip_handling == SkipHandling::Exclude && seg.kind == SegmentType::Skip {
287 continue;
288 }
289 let (start, end) = if Some(i) == self.plus_index {
290 (self.offsets[i] as usize, self.bases.len() - self.post_plus_len)
293 } else {
294 let off = self.offsets[i];
295 let start =
296 if off >= 0 { off as usize } else { self.bases.len() - ((-off) as usize) };
297 let len = seg.length.expect("non-`+` segment must have a length");
299 (start, start + len)
300 };
301 return Some((seg, &self.bases[start..end], &self.quals[start..end]));
302 }
303 None
304 }
305}
306
307impl FusedIterator for ExtractedSegments<'_, '_> {}
308
309impl IntoIterator for ReadStructure {
310 type Item = ReadSegment;
311
312 type IntoIter = std::vec::IntoIter<Self::Item>;
313
314 fn into_iter(self) -> Self::IntoIter {
315 self.elements.into_iter()
316 }
317}
318
319impl Index<usize> for ReadStructure {
320 type Output = ReadSegment;
321
322 fn index(&self, idx: usize) -> &Self::Output {
324 &self.elements[idx]
325 }
326}
327
328impl std::fmt::Display for ReadStructure {
329 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
331 for e in &self.elements {
332 write!(f, "{}", e)?;
333 }
334 Ok(())
335 }
336}
337
338impl std::str::FromStr for ReadStructure {
339 type Err = ReadStructureError;
340
341 fn from_str(rs: &str) -> Result<Self, Self::Err> {
343 let mut i = 0;
344 let mut segs: Vec<ReadSegment> = Vec::new();
345 let chars: Vec<char> = rs.to_uppercase().chars().filter(|c| !c.is_whitespace()).collect();
346 while i < chars.len() {
347 let parse_i = i;
349
350 let length = if chars[i] as u8 == ANY_LENGTH_BYTE {
352 i += 1;
353 None
354 } else if chars[i].is_ascii_digit() {
355 let mut len: usize = 0;
356 while i < chars.len() && chars[i].is_ascii_digit() {
357 let digit = chars[i].to_digit(10).unwrap() as usize;
359 len = (len * 10) + digit;
360 i += 1;
361 }
362 Some(len)
363 } else {
364 return Err(ReadStructureError::ReadStructureMissingLengthInformation(
365 ErrorMessageParts::new(&chars, parse_i, parse_i + 1),
366 ));
367 };
368
369 if chars.len() == i {
371 return Err(ReadStructureError::ReadStructureMissingOperator(
372 ErrorMessageParts::new(&chars, parse_i, i),
373 ));
374 } else if let Ok(kind) = SegmentType::try_from(chars[i]) {
375 if length == Some(0) {
376 return Err(ReadStructureError::ReadSegmentLengthZero(ErrorMessageParts::new(
377 &chars, parse_i, i,
378 )));
379 }
380 i += 1;
381 segs.push(ReadSegment { length, kind });
382 } else {
383 return Err(ReadStructureError::ReadStructureHadUnknownType(
384 ErrorMessageParts::new(&chars, parse_i, i + 1),
385 ));
386 }
387 }
388
389 ReadStructure::new(segs)
390 }
391}
392
393impl TryFrom<&[ReadSegment]> for ReadStructure {
394 type Error = ReadStructureError;
395 fn try_from(elements: &[ReadSegment]) -> Result<Self, Self::Error> {
397 Self::new(elements.to_vec())
398 }
399}
400
401impl TryFrom<String> for ReadStructure {
402 type Error = ReadStructureError;
403 fn try_from(s: String) -> Result<Self, Self::Error> {
405 s.parse()
406 }
407}
408
409impl From<ReadStructure> for String {
410 fn from(rs: ReadStructure) -> Self {
412 rs.to_string()
413 }
414}
415
416#[cfg(test)]
417mod test {
418 use crate::ReadStructureError;
419 use crate::read_structure::{ReadStructure, SkipHandling};
420 use crate::segment_type::SegmentType;
421 use std::str::FromStr;
422
423 #[test]
424 fn test_read_structure_from_str() {
425 let rss =
426 ["1T", "1B", "1M", "1S", "101T", "5B101T", "123456789T", "10T10B10B10S10M", "5B2C3T"];
427 for rs in &rss {
428 assert_eq!(ReadStructure::from_str(rs).unwrap().to_string(), *rs);
429 }
430 }
431
432 #[test]
433 fn test_read_structure_from_str_with_whitespace() {
434 let rss = ["75T 8B 8B 75T", " 75T 8B 8B\t75T "];
435 for rs in &rss {
436 assert_eq!(ReadStructure::from_str(rs).unwrap().to_string(), "75T8B8B75T");
437 }
438 }
439
440 #[test]
441 fn test_read_structure_accepts_plus_at_any_position_once() {
442 assert_eq!(ReadStructure::from_str("5M+T").unwrap().to_string(), "5M+T");
443 assert_eq!(ReadStructure::from_str("+M").unwrap().to_string(), "+M");
444 }
445
446 macro_rules! test_read_structure_from_str_err {
447 ($($name:ident: $value:expr_2021,)*) => {
448 $(
449 #[test]
450 fn $name() {
451 assert!(ReadStructure::from_str($value).is_err());
452 }
453 )*
454 }
455 }
456
457 test_read_structure_from_str_err! {
458 test_read_structure_rejects_multiple_plus_0: "++M",
459 test_read_structure_rejects_multiple_plus_1: "5M++T",
460 test_read_structure_rejects_multiple_plus_2: "5M70+T",
461 test_read_structure_rejects_multiple_plus_3: "+M+T",
462 test_read_structure_rejects_multiple_plus_4: "5M+T+B",
463 }
464
465 macro_rules! test_read_structure_from_str_invalid {
466 ($($name:ident: $value:expr_2021,)*) => {
467 $(
468 #[test]
469 fn $name() {
470 let (input, expected) = $value;
471 let actual = ReadStructure::from_str(input);
472 assert!(actual.unwrap_err().to_string().ends_with(expected));
473 }
474 )*
475 }
476 }
477
478 test_read_structure_from_str_invalid! {
479 test_read_structure_from_str_invalid_0: ("9R", "[9R]"),
480 test_read_structure_from_str_invalid_1: ("T", "[T]"),
481 test_read_structure_from_str_invalid_2: ("23TT", "23T[T]"),
482 test_read_structure_from_str_invalid_3: ("23T2", "23T[2]"),
483 test_read_structure_from_str_invalid_4: ("23T2TT23T", "23T2T[T]23T"),
484 }
485
486 #[test]
487 fn test_read_structure_collect_segments() {
488 let rs = ReadStructure::from_str("10M9T8B7S3C10M9T8B7S2C").unwrap();
489 let templates: String = rs.templates().map(|s| s.to_string()).collect();
490 assert_eq!(templates, "9T9T");
491 let sample_barcodes: String = rs.sample_barcodes().map(|s| s.to_string()).collect();
492 assert_eq!(sample_barcodes, "8B8B");
493 let molecular_barcodes: String = rs.molecular_barcodes().map(|s| s.to_string()).collect();
494 assert_eq!(molecular_barcodes, "10M10M");
495 let skips: String = rs.skips().map(|s| s.to_string()).collect();
496 assert_eq!(skips, "7S7S");
497 let cellular_barcodes: String = rs.cellular_barcodes().map(|s| s.to_string()).collect();
498 assert_eq!(cellular_barcodes, "3C2C");
499 }
500
501 macro_rules! test_read_structure_length {
502 ($($name:ident: $value:expr_2021,)*) => {
503 $(
504 #[test]
505 fn $name() {
506 let (input, expected) = $value;
507 let actual = ReadStructure::from_str(input).unwrap().number_of_segments();
508 assert_eq!(actual, expected);
509 }
510 )*
511 }
512 }
513
514 test_read_structure_length! {
515 test_read_structure_length_0: ("1T", 1),
516 test_read_structure_length_1: ("1B", 1),
517 test_read_structure_length_2: ("1M", 1),
518 test_read_structure_length_3: ("1S", 1),
519 test_read_structure_length_4: ("101T", 1),
520 test_read_structure_length_5: ("5B101T", 2),
521 test_read_structure_length_6: ("123456789T", 1),
522 test_read_structure_length_7: ("10T10B10B10S10M", 5),
523 }
524
525 macro_rules! test_read_structure_index {
526 ($($name:ident: $value:expr_2021,)*) => {
527 $(
528 #[test]
529 fn $name() {
530 let (string, index, exp_string) = $value;
531 let read_structure = ReadStructure::from_str(string).unwrap();
532 let read_segment = read_structure[index];
533 assert_eq!(read_segment.to_string(), exp_string);
534 }
535 )*
536 }
537 }
538
539 test_read_structure_index! {
540 test_read_structure_index_0: ("1T", 0, "1T"),
541 test_read_structure_index_1: ("1B", 0, "1B"),
542 test_read_structure_index_2: ("1M", 0, "1M"),
543 test_read_structure_index_3: ("1S", 0, "1S"),
544 test_read_structure_index_4: ("101T", 0, "101T"),
545 test_read_structure_index_5: ("5B101T", 0, "5B"),
546 test_read_structure_index_6: ("5B101T", 1, "101T"),
547 test_read_structure_index_7: ("123456789T", 0, "123456789T"),
548 test_read_structure_index_8: ("10T10B10B10S10M", 0, "10T"),
549 test_read_structure_index_9: ("10T10B10B10S10M", 1, "10B"),
550 test_read_structure_index_10: ("10T10B10B10S10M", 2, "10B"),
551 test_read_structure_index_11: ("10T10B10B10S10M", 3, "10S"),
552 test_read_structure_index_12: ("10T10B10B10S10M", 4, "10M"),
553 test_read_structure_index_32: ("10T10B10B10S10C10M", 4, "10C"),
554 }
555
556 #[test]
557 #[cfg(feature = "serde")]
558 fn test_serde() {
559 let rs = ReadStructure::from_str("10T10B10B10S10M").unwrap();
560 let rs_json = serde_json::to_string(&rs).unwrap();
561 let rs2 = serde_json::from_str(&rs_json).unwrap();
562 assert_eq!(rs, rs2);
563 }
564
565 #[test]
566 #[cfg(feature = "serde")]
567 fn test_serde_middle_plus_round_trip() {
568 let rs = ReadStructure::from_str("8B+M10T").unwrap();
569 let rs_json = serde_json::to_string(&rs).unwrap();
570 let rs2: ReadStructure = serde_json::from_str(&rs_json).unwrap();
571 assert_eq!(rs, rs2);
572 }
573
574 #[test]
575 #[cfg(feature = "serde")]
576 fn test_serde_wire_format_is_canonical_string() {
577 let rs = ReadStructure::from_str("8B+M10T").unwrap();
580 let rs_json = serde_json::to_string(&rs).unwrap();
581 assert_eq!(rs_json, "\"8B+M10T\"");
582 }
583
584 #[test]
585 #[cfg(feature = "serde")]
586 fn test_serde_rejects_invalid_string() {
587 let err = serde_json::from_str::<ReadStructure>("\"not a read structure\"").unwrap_err();
588 assert!(!err.to_string().is_empty());
591 }
592
593 #[test]
596 fn test_accepts_middle_plus() {
597 let rs = ReadStructure::from_str("8B+M10T").unwrap();
598 assert_eq!(rs.to_string(), "8B+M10T");
599 assert_eq!(rs.number_of_segments(), 3);
600 }
601
602 #[test]
603 fn test_accepts_leading_plus() {
604 let rs = ReadStructure::from_str("+B10T").unwrap();
605 assert_eq!(rs.to_string(), "+B10T");
606 assert_eq!(rs.number_of_segments(), 2);
607 }
608
609 #[test]
610 fn test_accepts_middle_plus_between_fixed_runs() {
611 let rs = ReadStructure::from_str("10T8B+M10T").unwrap();
612 assert_eq!(rs.to_string(), "10T8B+M10T");
613 assert_eq!(rs.number_of_segments(), 4);
614 }
615
616 #[test]
619 fn test_has_fixed_length_strict() {
620 assert!(ReadStructure::from_str("10T8B").unwrap().has_fixed_length());
621 assert!(!ReadStructure::from_str("10T+M").unwrap().has_fixed_length());
622 }
623
624 #[test]
625 fn test_has_fixed_length_middle_plus() {
626 assert!(!ReadStructure::from_str("8B+M10T").unwrap().has_fixed_length());
627 }
628
629 #[test]
630 fn test_fixed_length_none_for_middle_plus() {
631 assert!(ReadStructure::from_str("8B+M10T").unwrap().fixed_length().is_none());
632 }
633
634 #[test]
637 fn test_extract_fixed_length() {
638 let rs = ReadStructure::from_str("10T8B").unwrap();
639 let bases = b"AAAAAAAAAAGGGGGGGG";
640 let quals = b"IIIIIIIIIIJJJJJJJJ";
641 let out: Vec<_> = rs.extract(bases, quals, SkipHandling::Include).unwrap().collect();
642 assert_eq!(out.len(), 2);
643 assert_eq!(out[0].0.kind, SegmentType::Template);
644 assert_eq!(out[0].1, b"AAAAAAAAAA");
645 assert_eq!(out[0].2, b"IIIIIIIIII");
646 assert_eq!(out[1].0.kind, SegmentType::SampleBarcode);
647 assert_eq!(out[1].1, b"GGGGGGGG");
648 assert_eq!(out[1].2, b"JJJJJJJJ");
649 }
650
651 #[test]
652 fn test_extract_trailing_plus() {
653 let rs = ReadStructure::from_str("10T+M").unwrap();
654 let bases = b"AAAAAAAAAAGGGGGGGGGG";
655 let quals = b"IIIIIIIIIIJJJJJJJJJJ";
656 let out: Vec<_> = rs.extract(bases, quals, SkipHandling::Include).unwrap().collect();
657 assert_eq!(out.len(), 2);
658 assert_eq!(out[0].1, b"AAAAAAAAAA");
659 assert_eq!(out[1].1, b"GGGGGGGGGG");
660 assert_eq!(out[1].2, b"JJJJJJJJJJ");
661 }
662
663 #[test]
664 fn test_extract_leading_plus() {
665 let rs = ReadStructure::from_str("+B10T").unwrap();
666 let bases = b"BBBBBTTTTTTTTTT";
667 let quals = b"!!!!!##########";
668 let out: Vec<_> = rs.extract(bases, quals, SkipHandling::Include).unwrap().collect();
669 assert_eq!(out.len(), 2);
670 assert_eq!(out[0].0.kind, SegmentType::SampleBarcode);
671 assert_eq!(out[0].1, b"BBBBB");
672 assert_eq!(out[0].2, b"!!!!!");
673 assert_eq!(out[1].0.kind, SegmentType::Template);
674 assert_eq!(out[1].1, b"TTTTTTTTTT");
675 assert_eq!(out[1].2, b"##########");
676 }
677
678 #[test]
679 fn test_extract_middle_plus() {
680 let rs = ReadStructure::from_str("8B+M10T").unwrap();
681 let bases = b"BBBBBBBBUUUUUUUUUUUUTTTTTTTTTT";
682 let quals = b"!!!!!!!!@@@@@@@@@@@@##########";
683 assert_eq!(bases.len(), 30);
684 let out: Vec<_> = rs.extract(bases, quals, SkipHandling::Include).unwrap().collect();
685 assert_eq!(out.len(), 3);
686 assert_eq!(out[0].1, b"BBBBBBBB");
687 assert_eq!(out[0].2, b"!!!!!!!!");
688 assert_eq!(out[1].0.kind, SegmentType::MolecularBarcode);
689 assert_eq!(out[1].1, b"UUUUUUUUUUUU");
690 assert_eq!(out[1].2, b"@@@@@@@@@@@@");
691 assert_eq!(out[2].1, b"TTTTTTTTTT");
692 assert_eq!(out[2].2, b"##########");
693 }
694
695 #[test]
696 fn test_extract_multiple_pre_plus_and_post_plus() {
697 let rs = ReadStructure::from_str("10T8B+M10T").unwrap();
699 let bases = b"TTTTTTTTTTBBBBBBBBUUUUUUUUUUUUTTTTTTTTTT";
700 let quals = b"IIIIIIIIII!!!!!!!!@@@@@@@@@@@@##########";
701 assert_eq!(bases.len(), 40);
702 let out: Vec<_> = rs.extract(bases, quals, SkipHandling::Include).unwrap().collect();
703 assert_eq!(out.len(), 4);
704 assert_eq!(out[0].1, b"TTTTTTTTTT");
705 assert_eq!(out[1].1, b"BBBBBBBB");
706 assert_eq!(out[2].1, b"UUUUUUUUUUUU");
707 assert_eq!(out[3].1, b"TTTTTTTTTT");
708 }
709
710 #[test]
711 fn test_extract_include_skips_false_drops_skip() {
712 let rs = ReadStructure::from_str("8S+M10T").unwrap();
713 let bases = b"SSSSSSSSUUUUUUUUUUUUTTTTTTTTTT";
714 let quals = b"????????@@@@@@@@@@@@##########";
715 let out: Vec<_> = rs.extract(bases, quals, SkipHandling::Exclude).unwrap().collect();
716 assert_eq!(out.len(), 2);
717 assert_eq!(out[0].0.kind, SegmentType::MolecularBarcode);
718 assert_eq!(out[1].0.kind, SegmentType::Template);
719 }
720
721 #[test]
722 fn test_extract_include_skips_true_keeps_skip() {
723 let rs = ReadStructure::from_str("8S+M10T").unwrap();
724 let bases = b"SSSSSSSSUUUUUUUUUUUUTTTTTTTTTT";
725 let quals = b"????????@@@@@@@@@@@@##########";
726 let out: Vec<_> = rs.extract(bases, quals, SkipHandling::Include).unwrap().collect();
727 assert_eq!(out.len(), 3);
728 assert_eq!(out[0].0.kind, SegmentType::Skip);
729 assert_eq!(out[0].1, b"SSSSSSSS");
730 }
731
732 #[test]
733 fn test_extract_errors_on_bases_quals_length_mismatch() {
734 let rs = ReadStructure::from_str("10T").unwrap();
735 let err = rs.extract(b"AAAAAAAAAA", b"III", SkipHandling::Include).unwrap_err();
736 assert!(matches!(err, ReadStructureError::MismatchingBasesAndQualsLen { .. }));
737 }
738
739 #[test]
740 fn test_extract_errors_when_read_too_short_for_fixed() {
741 let rs = ReadStructure::from_str("10T8B").unwrap();
742 let err = rs.extract(b"AAAA", b"IIII", SkipHandling::Include).unwrap_err();
743 match err {
744 ReadStructureError::ReadTooShort { read_len, required } => {
745 assert_eq!(read_len, 4);
746 assert_eq!(required, 18);
747 }
748 other => panic!("expected ReadTooShort, got {:?}", other),
749 }
750 }
751
752 #[test]
753 fn test_extract_errors_when_read_too_long_for_fixed() {
754 let rs = ReadStructure::from_str("10T8B").unwrap();
757 let bases = vec![b'X'; 20]; let quals = vec![b'#'; 20];
759 let err = rs.extract(&bases, &quals, SkipHandling::Include).unwrap_err();
760 match err {
761 ReadStructureError::ReadTooLong { read_len, expected } => {
762 assert_eq!(read_len, 20);
763 assert_eq!(expected, 18);
764 }
765 other => panic!("expected ReadTooLong, got {:?}", other),
766 }
767 }
768
769 #[test]
770 fn test_extract_allows_extra_bases_when_plus_present() {
771 let rs = ReadStructure::from_str("8B+M10T").unwrap();
773 let bases = b"BBBBBBBBUUUUUUUUUUUUUUUUUUUUUUUUTTTTTTTTTT";
774 let quals = b"!!!!!!!!@@@@@@@@@@@@@@@@@@@@@@@@##########";
775 assert_eq!(bases.len(), 42);
776 let out: Vec<_> = rs.extract(bases, quals, SkipHandling::Include).unwrap().collect();
777 assert_eq!(out.len(), 3);
778 assert_eq!(out[1].1.len(), 24); }
780
781 #[test]
782 fn test_extract_errors_when_read_exactly_fixed_len_but_plus_present() {
783 let rs = ReadStructure::from_str("8B+M10T").unwrap();
785 let bases = vec![b'X'; 18]; let quals = vec![b'#'; 18];
787 let err = rs.extract(&bases, &quals, SkipHandling::Include).unwrap_err();
788 match err {
789 ReadStructureError::ReadTooShort { read_len, required } => {
790 assert_eq!(read_len, 18);
791 assert_eq!(required, 19);
792 }
793 other => panic!("expected ReadTooShort, got {:?}", other),
794 }
795 }
796
797 #[test]
798 fn test_extract_allows_read_exactly_fixed_len_when_no_plus() {
799 let rs = ReadStructure::from_str("10T8B").unwrap();
800 let bases = vec![b'X'; 18];
801 let quals = vec![b'#'; 18];
802 let out: Vec<_> = rs.extract(&bases, &quals, SkipHandling::Include).unwrap().collect();
803 assert_eq!(out.len(), 2);
804 }
805
806 #[test]
807 fn test_extract_plus_only_structure() {
808 let rs = ReadStructure::from_str("+T").unwrap();
810 let bases = b"AAAAAAAAAA";
811 let quals = b"IIIIIIIIII";
812 let out: Vec<_> = rs.extract(bases, quals, SkipHandling::Include).unwrap().collect();
813 assert_eq!(out.len(), 1);
814 assert_eq!(out[0].0.kind, SegmentType::Template);
815 assert_eq!(out[0].1, bases);
816 assert_eq!(out[0].2, quals);
817 }
818
819 #[test]
820 fn test_extract_plus_yields_one_base_at_minimum_length() {
821 let rs = ReadStructure::from_str("8B+M10T").unwrap();
824 let bases = b"BBBBBBBBMTTTTTTTTTT";
825 let quals = b"!!!!!!!!@##########";
826 assert_eq!(bases.len(), 19);
827 let out: Vec<_> = rs.extract(bases, quals, SkipHandling::Include).unwrap().collect();
828 assert_eq!(out.len(), 3);
829 assert_eq!(out[0].1, b"BBBBBBBB");
830 assert_eq!(out[1].0.kind, SegmentType::MolecularBarcode);
831 assert_eq!(out[1].1, b"M");
832 assert_eq!(out[1].2, b"@");
833 assert_eq!(out[2].1, b"TTTTTTTTTT");
834 }
835
836 #[test]
837 fn test_extract_multiple_post_plus_segments() {
838 let rs = ReadStructure::from_str("8B+M5T5S").unwrap();
840 let bases = b"BBBBBBBBUUUUUUUUUUUUTTTTTSSSSS";
841 let quals = b"!!!!!!!!@@@@@@@@@@@@#####?????";
842 assert_eq!(bases.len(), 30);
843 let out: Vec<_> = rs.extract(bases, quals, SkipHandling::Include).unwrap().collect();
844 assert_eq!(out.len(), 4);
845 assert_eq!(out[0].1, b"BBBBBBBB");
846 assert_eq!(out[1].0.kind, SegmentType::MolecularBarcode);
847 assert_eq!(out[1].1, b"UUUUUUUUUUUU");
848 assert_eq!(out[2].0.kind, SegmentType::Template);
849 assert_eq!(out[2].1, b"TTTTT");
850 assert_eq!(out[3].0.kind, SegmentType::Skip);
851 assert_eq!(out[3].1, b"SSSSS");
852 }
853}