1use crate::sam::{record_utils, to_smallest_signed_int};
39use crate::unified_pipeline::MemoryEstimate;
40use anyhow::{Result, anyhow, bail};
41use noodles::sam::alignment::record::Cigar;
42use noodles::sam::alignment::record::data::field::Tag;
43use noodles::sam::alignment::record_buf::RecordBuf;
44use noodles::sam::alignment::record_buf::data::field::Value as BufValue;
45use std::fmt::Write as _;
46
47pub use crate::sam::PairOrientation;
49
50pub use fgumi_umi::MoleculeId;
52
53#[derive(Debug, Clone)]
63pub struct Template {
64 pub name: Vec<u8>,
66 pub records: Vec<RecordBuf>,
68 pub raw_records: Option<Vec<Vec<u8>>>,
70 pub r1: Option<(usize, usize)>,
72 pub r2: Option<(usize, usize)>,
74 pub r1_supplementals: Option<(usize, usize)>,
76 pub r2_supplementals: Option<(usize, usize)>,
78 pub r1_secondaries: Option<(usize, usize)>,
80 pub r2_secondaries: Option<(usize, usize)>,
82 pub mi: MoleculeId,
84}
85
86pub type TemplateBatch = Vec<Template>;
88
89#[derive(Debug, Default)]
105pub struct Builder {
106 name: Option<Vec<u8>>,
108 r1: Option<RecordBuf>,
110 r2: Option<RecordBuf>,
112 r1_supplementals: Vec<RecordBuf>,
114 r2_supplementals: Vec<RecordBuf>,
116 r1_secondaries: Vec<RecordBuf>,
118 r2_secondaries: Vec<RecordBuf>,
120}
121
122impl Builder {
123 fn new() -> Self {
129 Builder {
130 name: None,
131 r1: None,
132 r2: None,
133 r1_supplementals: Vec::new(),
134 r2_supplementals: Vec::new(),
135 r1_secondaries: Vec::new(),
136 r2_secondaries: Vec::new(),
137 }
138 }
139
140 pub fn push(&mut self, record: RecordBuf) -> Result<&mut Builder> {
154 let record_name_bytes: &[u8] = record.name().map_or(&[], <_ as AsRef<[u8]>>::as_ref);
156
157 if let Some(cur_name) = &self.name {
158 if cur_name.as_slice() != record_name_bytes {
160 bail!(
161 "Template name mismatch: found '{record_name_bytes:?}', expected '{cur_name:?}'"
162 );
163 }
164 } else {
165 self.name = Some(record_name_bytes.to_vec());
167 }
168
169 let flags = record.flags();
170 let is_r1 = !flags.is_segmented() || flags.is_first_segment();
171
172 if is_r1 {
173 if flags.is_secondary() {
174 self.r1_secondaries.push(record);
175 } else if flags.is_supplementary() {
176 self.r1_supplementals.push(record);
177 } else if self.r1.is_some() {
178 return Err(anyhow!(
179 "Multiple non-secondary, non-supplemental R1 records for read '{:?}'",
180 self.name.as_ref().unwrap()
181 ));
182 } else {
183 self.r1 = Some(record);
184 }
185 } else if flags.is_secondary() {
186 self.r2_secondaries.push(record);
187 } else if flags.is_supplementary() {
188 self.r2_supplementals.push(record);
189 } else if self.r2.is_some() {
190 return Err(anyhow!(
191 "Multiple non-secondary, non-supplemental R2 records for read '{:?}'",
192 self.name.as_ref().unwrap()
193 ));
194 } else {
195 self.r2 = Some(record);
196 }
197
198 Ok(self)
199 }
200
201 #[must_use]
207 pub fn len(&self) -> usize {
208 let mut count = 0;
209 count += usize::from(self.r1.is_some());
210 count += usize::from(self.r2.is_some());
211 count += self.r1_supplementals.len();
212 count += self.r2_supplementals.len();
213 count += self.r1_secondaries.len();
214 count += self.r2_secondaries.len();
215 count
216 }
217
218 #[must_use]
224 pub fn is_empty(&self) -> bool {
225 self.len() == 0
226 }
227
228 pub fn build(&mut self) -> Result<Template> {
236 let r1_end: usize = usize::from(self.r1.is_some());
237 let r2_end: usize = r1_end + usize::from(self.r2.is_some());
238 let r1_supplementals_end: usize = r2_end + self.r1_supplementals.len();
239 let r2_supplementals_end: usize = r1_supplementals_end + self.r2_supplementals.len();
240 let r1_secondaries_end: usize = r2_supplementals_end + self.r1_secondaries.len();
241 let r2_secondaries_end: usize = r1_secondaries_end + self.r2_secondaries.len();
242
243 let r1: Option<(usize, usize)> = if self.r1.is_some() { Some((0, r1_end)) } else { None };
244 let r2: Option<(usize, usize)> =
245 if self.r2.is_some() { Some((r1_end, r2_end)) } else { None };
246 let r1_supplementals: Option<(usize, usize)> = if self.r1_supplementals.is_empty() {
247 None
248 } else {
249 Some((r2_end, r1_supplementals_end))
250 };
251 let r2_supplementals: Option<(usize, usize)> = if self.r2_supplementals.is_empty() {
252 None
253 } else {
254 Some((r1_supplementals_end, r2_supplementals_end))
255 };
256 let r1_secondaries: Option<(usize, usize)> = if self.r1_secondaries.is_empty() {
257 None
258 } else {
259 Some((r2_supplementals_end, r1_secondaries_end))
260 };
261 let r2_secondaries: Option<(usize, usize)> = if self.r2_secondaries.is_empty() {
262 None
263 } else {
264 Some((r1_secondaries_end, r2_secondaries_end))
265 };
266
267 let Some(name) = self.name.take() else {
268 return Err(anyhow!("No records given to template builder"));
269 };
270
271 let mut records: Vec<RecordBuf> = Vec::with_capacity(self.len());
273 if let Some(rec) = self.r1.take() {
274 records.push(rec);
275 }
276 if let Some(rec) = self.r2.take() {
277 records.push(rec);
278 }
279 self.r1_supplementals.reverse();
282 self.r2_supplementals.reverse();
283 self.r1_secondaries.reverse();
284 self.r2_secondaries.reverse();
285 records.append(&mut self.r1_supplementals);
286 records.append(&mut self.r2_supplementals);
287 records.append(&mut self.r1_secondaries);
288 records.append(&mut self.r2_secondaries);
289
290 Ok(Template {
291 name,
292 records,
293 raw_records: None,
294 r1,
295 r2,
296 r1_supplementals,
297 r2_supplementals,
298 r1_secondaries,
299 r2_secondaries,
300 mi: MoleculeId::None,
301 })
302 }
303}
304
305impl Template {
306 #[must_use]
316 pub fn r1(&self) -> Option<&RecordBuf> {
317 if self.records.is_empty() {
318 return None;
319 }
320 self.r1.map(|_| &self.records[0])
321 }
322
323 #[must_use]
333 pub fn r2(&self) -> Option<&RecordBuf> {
334 if self.records.is_empty() {
335 return None;
336 }
337 self.r2.map(|(i, _)| &self.records[i])
338 }
339
340 fn records_in_range(&self, range: Option<(usize, usize)>) -> &[RecordBuf] {
342 if let Some((start, end)) = range {
343 if start <= end && end <= self.records.len() {
344 return &self.records[start..end];
345 }
346 }
347 &[]
348 }
349
350 #[must_use]
353 pub fn r1_supplementals(&self) -> &[RecordBuf] {
354 self.records_in_range(self.r1_supplementals)
355 }
356
357 #[must_use]
360 pub fn r2_supplementals(&self) -> &[RecordBuf] {
361 self.records_in_range(self.r2_supplementals)
362 }
363
364 #[must_use]
367 pub fn r1_secondaries(&self) -> &[RecordBuf] {
368 self.records_in_range(self.r1_secondaries)
369 }
370
371 #[must_use]
374 pub fn r2_secondaries(&self) -> &[RecordBuf] {
375 self.records_in_range(self.r2_secondaries)
376 }
377
378 #[must_use]
388 pub fn new(name: Vec<u8>) -> Self {
389 Template {
390 name,
391 records: Vec::new(),
392 raw_records: None,
393 r1: None,
394 r2: None,
395 r1_supplementals: None,
396 r2_supplementals: None,
397 r1_secondaries: None,
398 r2_secondaries: None,
399 mi: MoleculeId::None,
400 }
401 }
402
403 pub fn from_records<I>(records: I) -> Result<Self>
427 where
428 I: IntoIterator<Item = RecordBuf>,
429 {
430 let mut builder = Builder::new();
431
432 for record in records {
433 builder.push(record)?;
434 }
435
436 builder.build()
437 }
438
439 pub fn primary_reads(&self) -> impl Iterator<Item = &RecordBuf> {
447 let records = &self.records;
448 self.r1.iter().chain(self.r2.iter()).filter_map(move |(start, _)| records.get(*start))
449 }
450
451 #[must_use]
471 pub fn into_primary_reads(mut self) -> (Option<RecordBuf>, Option<RecordBuf>) {
472 if self.records.is_empty() {
473 return (None, None);
474 }
475 let r2 = self.r2.and_then(|(i, _)| self.records.get_mut(i).map(std::mem::take));
477 let r1 = self.r1.and_then(|_| self.records.get_mut(0).map(std::mem::take));
478 (r1, r2)
479 }
480
481 pub fn all_supplementary_and_secondary(&self) -> impl Iterator<Item = &RecordBuf> {
487 let records = &self.records;
488 let iter = self
489 .r1_supplementals
490 .iter()
491 .chain(self.r2_supplementals.iter())
492 .chain(self.r1_secondaries.iter())
493 .chain(self.r2_secondaries.iter());
494 iter.flat_map(move |(start, end)| {
495 let s = (*start).min(records.len());
496 let e = (*end).min(records.len());
497 records[s..e].iter()
498 })
499 }
500
501 pub fn all_reads(&self) -> impl Iterator<Item = &RecordBuf> {
509 self.primary_reads().chain(self.all_supplementary_and_secondary())
510 }
511
512 #[must_use]
517 pub fn into_records(self) -> Vec<RecordBuf> {
518 self.records
519 }
520
521 pub fn all_r1s(&self) -> impl Iterator<Item = &RecordBuf> {
527 let records = &self.records;
528 let iter =
529 self.r1.iter().chain(self.r1_secondaries.iter()).chain(self.r1_supplementals.iter());
530 iter.flat_map(move |(start, end)| {
531 let s = (*start).min(records.len());
532 let e = (*end).min(records.len());
533 records[s..e].iter()
534 })
535 }
536
537 pub fn all_r2s(&self) -> impl Iterator<Item = &RecordBuf> {
543 let records = &self.records;
544 let iter =
545 self.r2.iter().chain(self.r2_secondaries.iter()).chain(self.r2_supplementals.iter());
546 iter.flat_map(move |(start, end)| {
547 let s = (*start).min(records.len());
548 let e = (*end).min(records.len());
549 records[s..e].iter()
550 })
551 }
552
553 #[must_use]
559 pub fn read_count(&self) -> usize {
560 if let Some(ref rr) = self.raw_records { rr.len() } else { self.records.len() }
561 }
562
563 #[inline]
565 #[must_use]
566 pub fn is_raw_byte_mode(&self) -> bool {
567 self.raw_records.is_some()
568 }
569
570 #[must_use]
572 pub fn raw_r1(&self) -> Option<&[u8]> {
573 let rr = self.raw_records.as_ref()?;
574 self.r1.map(|_| rr[0].as_slice())
575 }
576
577 #[must_use]
579 pub fn raw_r2(&self) -> Option<&[u8]> {
580 let rr = self.raw_records.as_ref()?;
581 self.r2.map(|(i, _)| rr[i].as_slice())
582 }
583
584 #[must_use]
586 pub fn all_raw_records(&self) -> Option<&[Vec<u8>]> {
587 self.raw_records.as_deref()
588 }
589
590 #[must_use]
592 pub fn into_raw_records(self) -> Option<Vec<Vec<u8>>> {
593 self.raw_records
594 }
595
596 pub fn all_raw_records_mut(&mut self) -> Option<&mut [Vec<u8>]> {
598 self.raw_records.as_deref_mut()
599 }
600
601 #[allow(clippy::too_many_lines)]
611 pub fn from_raw_records(mut raw_records: Vec<Vec<u8>>) -> Result<Self> {
612 use crate::sort::bam_fields;
613
614 if raw_records.is_empty() {
615 bail!("No records given to from_raw_records");
616 }
617
618 for (i, r) in raw_records.iter().enumerate() {
620 if r.len() < bam_fields::MIN_BAM_RECORD_LEN {
621 bail!(
622 "Raw BAM record {i} too short to parse ({} < {})",
623 r.len(),
624 bam_fields::MIN_BAM_RECORD_LEN
625 );
626 }
627 let l_rn = r[8] as usize;
628 if r.len() < 32 + l_rn {
629 bail!(
630 "Raw BAM record {i} truncated: l_read_name={l_rn} but only {} bytes after header",
631 r.len() - 32
632 );
633 }
634 }
635
636 let name = bam_fields::read_name(&raw_records[0]).to_vec();
638
639 if raw_records.len() == 2 {
641 let f0 = bam_fields::flags(&raw_records[0]);
642 let f1 = bam_fields::flags(&raw_records[1]);
643 let neither_sec_supp =
644 (f0 | f1) & (bam_fields::flags::SECONDARY | bam_fields::flags::SUPPLEMENTARY) == 0;
645 if neither_sec_supp {
646 let is_r1_0 = (f0 & bam_fields::flags::PAIRED) == 0
648 || (f0 & bam_fields::flags::FIRST_SEGMENT) != 0;
649 let is_r1_1 = (f1 & bam_fields::flags::PAIRED) == 0
650 || (f1 & bam_fields::flags::FIRST_SEGMENT) != 0;
651 if is_r1_0 != is_r1_1 {
653 if !is_r1_0 {
654 raw_records.swap(0, 1);
656 }
657 return Ok(Template {
658 name,
659 records: Vec::new(),
660 raw_records: Some(raw_records),
661 r1: Some((0, 1)),
662 r2: Some((1, 2)),
663 r1_supplementals: None,
664 r2_supplementals: None,
665 r1_secondaries: None,
666 r2_secondaries: None,
667 mi: MoleculeId::None,
668 });
669 }
670 }
672 }
673
674 for rec in raw_records.iter().skip(1) {
676 if bam_fields::read_name(rec) != name.as_slice() {
677 bail!("Template name mismatch in from_raw_records");
678 }
679 }
680
681 let mut r1_idx: Option<usize> = None;
683 let mut r2_idx: Option<usize> = None;
684 let mut r1_supp: Vec<usize> = Vec::new();
685 let mut r2_supp: Vec<usize> = Vec::new();
686 let mut r1_sec: Vec<usize> = Vec::new();
687 let mut r2_sec: Vec<usize> = Vec::new();
688
689 for (i, rec) in raw_records.iter().enumerate() {
690 let flg = bam_fields::flags(rec);
691 let is_secondary = (flg & bam_fields::flags::SECONDARY) != 0;
692 let is_supplementary = (flg & bam_fields::flags::SUPPLEMENTARY) != 0;
693 let is_paired = (flg & bam_fields::flags::PAIRED) != 0;
694 let is_first = (flg & bam_fields::flags::FIRST_SEGMENT) != 0;
695 let is_r1 = !is_paired || is_first;
696
697 if is_r1 {
698 if is_secondary {
699 r1_sec.push(i);
700 } else if is_supplementary {
701 r1_supp.push(i);
702 } else if r1_idx.is_some() {
703 bail!(
704 "Multiple non-secondary, non-supplemental R1 records for read '{name:?}'"
705 );
706 } else {
707 r1_idx = Some(i);
708 }
709 } else if is_secondary {
710 r2_sec.push(i);
711 } else if is_supplementary {
712 r2_supp.push(i);
713 } else if r2_idx.is_some() {
714 bail!("Multiple non-secondary, non-supplemental R2 records for read '{name:?}'");
715 } else {
716 r2_idx = Some(i);
717 }
718 }
719
720 let mut ordered: Vec<Vec<u8>> = Vec::with_capacity(raw_records.len());
723 let mut take = |idx: usize| -> Vec<u8> { std::mem::take(&mut raw_records[idx]) };
724
725 let r1_pair = if let Some(idx) = r1_idx {
727 ordered.push(take(idx));
728 Some((ordered.len() - 1, ordered.len()))
729 } else {
730 None
731 };
732
733 let r2_pair = if let Some(idx) = r2_idx {
734 ordered.push(take(idx));
735 Some((ordered.len() - 1, ordered.len()))
736 } else {
737 None
738 };
739
740 let r1_supp_pair = if r1_supp.is_empty() {
741 None
742 } else {
743 r1_supp.reverse();
744 let start = ordered.len();
745 for idx in &r1_supp {
746 ordered.push(take(*idx));
747 }
748 Some((start, ordered.len()))
749 };
750
751 let r2_supp_pair = if r2_supp.is_empty() {
752 None
753 } else {
754 r2_supp.reverse();
755 let start = ordered.len();
756 for idx in &r2_supp {
757 ordered.push(take(*idx));
758 }
759 Some((start, ordered.len()))
760 };
761
762 let r1_sec_pair = if r1_sec.is_empty() {
763 None
764 } else {
765 r1_sec.reverse();
766 let start = ordered.len();
767 for idx in &r1_sec {
768 ordered.push(take(*idx));
769 }
770 Some((start, ordered.len()))
771 };
772
773 let r2_sec_pair = if r2_sec.is_empty() {
774 None
775 } else {
776 r2_sec.reverse();
777 let start = ordered.len();
778 for idx in &r2_sec {
779 ordered.push(take(*idx));
780 }
781 Some((start, ordered.len()))
782 };
783
784 Ok(Template {
785 name,
786 records: Vec::new(),
787 raw_records: Some(ordered),
788 r1: r1_pair,
789 r2: r2_pair,
790 r1_supplementals: r1_supp_pair,
791 r2_supplementals: r2_supp_pair,
792 r1_secondaries: r1_sec_pair,
793 r2_secondaries: r2_sec_pair,
794 mi: MoleculeId::None,
795 })
796 }
797
798 #[must_use]
822 pub fn pair_orientation(&self) -> Option<PairOrientation> {
823 let r1 = self.r1()?;
824 let r2 = self.r2()?;
825
826 if r1.flags().is_unmapped() || r2.flags().is_unmapped() {
828 return None;
829 }
830
831 if r1.reference_sequence_id() != r2.reference_sequence_id() {
833 return None;
834 }
835
836 Some(get_pair_orientation(r1))
838 }
839
840 #[allow(clippy::too_many_lines)]
860 pub fn fix_mate_info(&mut self) -> Result<()> {
861 if self.records.is_empty() {
862 return Ok(());
863 }
864
865 let mapq_tag = Tag::new(b'M', b'Q');
866 let mate_cigar_tag = Tag::new(b'M', b'C');
867 let mate_score_tag = Tag::new(b'm', b's');
868 let align_score_tag = Tag::new(b'A', b'S');
869
870 if let (Some((r1_i, _)), Some((r2_i, _))) = (self.r1, self.r2) {
873 let r1_is_unmapped = self.records[r1_i].flags().is_unmapped();
874 let r2_is_unmapped = self.records[r2_i].flags().is_unmapped();
875
876 let r1_as = self.records[r1_i].data().get(&align_score_tag).and_then(extract_int_value);
878 let r2_as = self.records[r2_i].data().get(&align_score_tag).and_then(extract_int_value);
879
880 if !r1_is_unmapped && !r2_is_unmapped {
881 self.set_mate_info_both_mapped(r1_i, r2_i, mapq_tag, mate_cigar_tag)?;
883 } else if r1_is_unmapped && r2_is_unmapped {
884 self.set_mate_info_both_unmapped(r1_i, r2_i, mapq_tag, mate_cigar_tag);
886 } else {
887 self.set_mate_info_one_unmapped(
889 r1_i,
890 r2_i,
891 r1_is_unmapped,
892 mapq_tag,
893 mate_cigar_tag,
894 )?;
895 }
896
897 if let Some(as_value) = r2_as {
900 self.records[r1_i]
901 .data_mut()
902 .insert(mate_score_tag, to_smallest_signed_int(as_value));
903 }
904 if let Some(as_value) = r1_as {
905 self.records[r2_i]
906 .data_mut()
907 .insert(mate_score_tag, to_smallest_signed_int(as_value));
908 }
909 }
910
911 if let Some((r2_i, _)) = self.r2 {
922 let r2_ref_id = self.records[r2_i].reference_sequence_id();
923 let r2_pos = self.records[r2_i].alignment_start();
924 let r2_is_reverse = self.records[r2_i].flags().is_reverse_complemented();
925 let r2_is_unmapped = self.records[r2_i].flags().is_unmapped();
926 let r2_tlen = self.records[r2_i].template_length();
927 let r2_mapq = self.records[r2_i].mapping_quality();
928 let r2_cigar_str = cigar_to_string(self.records[r2_i].cigar())?;
929 let r2_as = self.records[r2_i].data().get(&align_score_tag).and_then(extract_int_value);
930
931 if let Some((start, end)) = self.r1_supplementals {
932 for i in start..end {
933 *self.records[i].mate_reference_sequence_id_mut() = r2_ref_id;
935 *self.records[i].mate_alignment_start_mut() = r2_pos;
936
937 set_mate_flags(&mut self.records[i], r2_is_reverse, r2_is_unmapped);
939
940 *self.records[i].template_length_mut() = -r2_tlen;
942
943 let r2_mapq_value = r2_mapq.map_or(255, |m| i32::from(u8::from(m)));
945 self.records[i]
946 .data_mut()
947 .insert(mapq_tag, to_smallest_signed_int(r2_mapq_value));
948
949 if !r2_cigar_str.is_empty() && r2_cigar_str != "*" && !r2_is_unmapped {
951 self.records[i]
952 .data_mut()
953 .insert(mate_cigar_tag, BufValue::from(r2_cigar_str.clone()));
954 }
955
956 if let Some(as_value) = r2_as {
958 self.records[i]
959 .data_mut()
960 .insert(mate_score_tag, to_smallest_signed_int(as_value));
961 }
962 }
963 }
964 }
965
966 if let Some((r1_i, _)) = self.r1 {
968 let r1_ref_id = self.records[r1_i].reference_sequence_id();
969 let r1_pos = self.records[r1_i].alignment_start();
970 let r1_is_reverse = self.records[r1_i].flags().is_reverse_complemented();
971 let r1_is_unmapped = self.records[r1_i].flags().is_unmapped();
972 let r1_tlen = self.records[r1_i].template_length();
973 let r1_mapq = self.records[r1_i].mapping_quality();
974 let r1_cigar_str = cigar_to_string(self.records[r1_i].cigar())?;
975 let r1_as = self.records[r1_i].data().get(&align_score_tag).and_then(extract_int_value);
976
977 if let Some((start, end)) = self.r2_supplementals {
978 for i in start..end {
979 *self.records[i].mate_reference_sequence_id_mut() = r1_ref_id;
981 *self.records[i].mate_alignment_start_mut() = r1_pos;
982
983 set_mate_flags(&mut self.records[i], r1_is_reverse, r1_is_unmapped);
985
986 *self.records[i].template_length_mut() = -r1_tlen;
988
989 let r1_mapq_value = r1_mapq.map_or(255, |m| i32::from(u8::from(m)));
991 self.records[i]
992 .data_mut()
993 .insert(mapq_tag, to_smallest_signed_int(r1_mapq_value));
994
995 if !r1_cigar_str.is_empty() && r1_cigar_str != "*" && !r1_is_unmapped {
997 self.records[i]
998 .data_mut()
999 .insert(mate_cigar_tag, BufValue::from(r1_cigar_str.clone()));
1000 }
1001
1002 if let Some(as_value) = r1_as {
1004 self.records[i]
1005 .data_mut()
1006 .insert(mate_score_tag, to_smallest_signed_int(as_value));
1007 }
1008 }
1009 }
1010 }
1011
1012 Ok(())
1013 }
1014
1015 fn set_mate_info_both_mapped(
1018 &mut self,
1019 r1_i: usize,
1020 r2_i: usize,
1021 mapq_tag: Tag,
1022 mate_cigar_tag: Tag,
1023 ) -> Result<()> {
1024 let r2_ref_id = self.records[r2_i].reference_sequence_id();
1026 let r2_pos = self.records[r2_i].alignment_start();
1027 let r2_is_reverse = self.records[r2_i].flags().is_reverse_complemented();
1028 let r2_mapq = self.records[r2_i].mapping_quality();
1029 let r2_cigar_str = cigar_to_string(self.records[r2_i].cigar())?;
1030
1031 let r1_ref_id = self.records[r1_i].reference_sequence_id();
1033 let r1_pos = self.records[r1_i].alignment_start();
1034 let r1_is_reverse = self.records[r1_i].flags().is_reverse_complemented();
1035 let r1_mapq = self.records[r1_i].mapping_quality();
1036 let r1_cigar_str = cigar_to_string(self.records[r1_i].cigar())?;
1037
1038 *self.records[r1_i].mate_reference_sequence_id_mut() = r2_ref_id;
1040 *self.records[r1_i].mate_alignment_start_mut() = r2_pos;
1041 set_mate_flags(&mut self.records[r1_i], r2_is_reverse, false);
1042 let r2_mapq_value = r2_mapq.map_or(255, |m| i32::from(u8::from(m)));
1044 self.records[r1_i].data_mut().insert(mapq_tag, to_smallest_signed_int(r2_mapq_value));
1045 if !r2_cigar_str.is_empty() && r2_cigar_str != "*" {
1046 self.records[r1_i].data_mut().insert(mate_cigar_tag, BufValue::from(r2_cigar_str));
1047 }
1048
1049 *self.records[r2_i].mate_reference_sequence_id_mut() = r1_ref_id;
1051 *self.records[r2_i].mate_alignment_start_mut() = r1_pos;
1052 set_mate_flags(&mut self.records[r2_i], r1_is_reverse, false);
1053 let r1_mapq_value = r1_mapq.map_or(255, |m| i32::from(u8::from(m)));
1055 self.records[r2_i].data_mut().insert(mapq_tag, to_smallest_signed_int(r1_mapq_value));
1056 if !r1_cigar_str.is_empty() && r1_cigar_str != "*" {
1057 self.records[r2_i].data_mut().insert(mate_cigar_tag, BufValue::from(r1_cigar_str));
1058 }
1059
1060 let insert_size = compute_insert_size(&self.records[r1_i], &self.records[r2_i]);
1062 *self.records[r1_i].template_length_mut() = insert_size;
1063 *self.records[r2_i].template_length_mut() = -insert_size;
1064
1065 Ok(())
1066 }
1067
1068 fn set_mate_info_both_unmapped(
1071 &mut self,
1072 r1_i: usize,
1073 r2_i: usize,
1074 mapq_tag: Tag,
1075 mate_cigar_tag: Tag,
1076 ) {
1077 let r1_is_reverse = self.records[r1_i].flags().is_reverse_complemented();
1079 let r2_is_reverse = self.records[r2_i].flags().is_reverse_complemented();
1080
1081 *self.records[r1_i].reference_sequence_id_mut() = None;
1083 *self.records[r1_i].alignment_start_mut() = None;
1084 *self.records[r1_i].mate_reference_sequence_id_mut() = None;
1085 *self.records[r1_i].mate_alignment_start_mut() = None;
1086 set_mate_flags(&mut self.records[r1_i], r2_is_reverse, true);
1087 self.records[r1_i].data_mut().remove(&mapq_tag);
1088 self.records[r1_i].data_mut().remove(&mate_cigar_tag);
1089 *self.records[r1_i].template_length_mut() = 0;
1090
1091 *self.records[r2_i].reference_sequence_id_mut() = None;
1093 *self.records[r2_i].alignment_start_mut() = None;
1094 *self.records[r2_i].mate_reference_sequence_id_mut() = None;
1095 *self.records[r2_i].mate_alignment_start_mut() = None;
1096 set_mate_flags(&mut self.records[r2_i], r1_is_reverse, true);
1097 self.records[r2_i].data_mut().remove(&mapq_tag);
1098 self.records[r2_i].data_mut().remove(&mate_cigar_tag);
1099 *self.records[r2_i].template_length_mut() = 0;
1100 }
1101
1102 fn set_mate_info_one_unmapped(
1105 &mut self,
1106 r1_i: usize,
1107 r2_i: usize,
1108 r1_is_unmapped: bool,
1109 mapq_tag: Tag,
1110 mate_cigar_tag: Tag,
1111 ) -> Result<()> {
1112 let (mapped_i, unmapped_i) = if r1_is_unmapped { (r2_i, r1_i) } else { (r1_i, r2_i) };
1113
1114 let mapped_ref_id = self.records[mapped_i].reference_sequence_id();
1116 let mapped_pos = self.records[mapped_i].alignment_start();
1117 let mapped_is_reverse = self.records[mapped_i].flags().is_reverse_complemented();
1118 let mapped_mapq = self.records[mapped_i].mapping_quality();
1119 let mapped_cigar_str = cigar_to_string(self.records[mapped_i].cigar())?;
1120
1121 let unmapped_is_reverse = self.records[unmapped_i].flags().is_reverse_complemented();
1123
1124 *self.records[unmapped_i].reference_sequence_id_mut() = mapped_ref_id;
1126 *self.records[unmapped_i].alignment_start_mut() = mapped_pos;
1127
1128 *self.records[mapped_i].mate_reference_sequence_id_mut() = mapped_ref_id;
1131 *self.records[mapped_i].mate_alignment_start_mut() = mapped_pos;
1132 set_mate_flags(&mut self.records[mapped_i], unmapped_is_reverse, true);
1133 self.records[mapped_i].data_mut().remove(&mapq_tag);
1134 self.records[mapped_i].data_mut().remove(&mate_cigar_tag);
1135 *self.records[mapped_i].template_length_mut() = 0;
1136
1137 *self.records[unmapped_i].mate_reference_sequence_id_mut() = mapped_ref_id;
1139 *self.records[unmapped_i].mate_alignment_start_mut() = mapped_pos;
1140 set_mate_flags(&mut self.records[unmapped_i], mapped_is_reverse, false);
1141 let mapped_mapq_value = mapped_mapq.map_or(255, |m| i32::from(u8::from(m)));
1143 self.records[unmapped_i]
1144 .data_mut()
1145 .insert(mapq_tag, to_smallest_signed_int(mapped_mapq_value));
1146 if !mapped_cigar_str.is_empty() && mapped_cigar_str != "*" {
1147 self.records[unmapped_i]
1148 .data_mut()
1149 .insert(mate_cigar_tag, BufValue::from(mapped_cigar_str));
1150 }
1151 *self.records[unmapped_i].template_length_mut() = 0;
1152
1153 Ok(())
1154 }
1155}
1156
1157fn compute_insert_size(rec1: &RecordBuf, rec2: &RecordBuf) -> i32 {
1166 if rec1.flags().is_unmapped() || rec2.flags().is_unmapped() {
1168 return 0;
1169 }
1170
1171 if rec1.reference_sequence_id() != rec2.reference_sequence_id() {
1173 return 0;
1174 }
1175
1176 let pos1 = rec1.alignment_start().map_or(0, |p| i32::try_from(usize::from(p)).unwrap_or(0));
1178 let pos2 = rec2.alignment_start().map_or(0, |p| i32::try_from(usize::from(p)).unwrap_or(0));
1179
1180 let end1 = pos1 + alignment_length(rec1.cigar()) - 1;
1182 let end2 = pos2 + alignment_length(rec2.cigar()) - 1;
1183
1184 let first_5prime = if rec1.flags().is_reverse_complemented() { end1 } else { pos1 };
1187 let second_5prime = if rec2.flags().is_reverse_complemented() { end2 } else { pos2 };
1188
1189 let adjustment = if second_5prime >= first_5prime { 1 } else { -1 };
1191 second_5prime - first_5prime + adjustment
1192}
1193
1194fn alignment_length(cigar: &noodles::sam::alignment::record_buf::Cigar) -> i32 {
1196 use noodles::sam::alignment::record::cigar::op::Kind;
1197 let mut len = 0i32;
1198 for op in cigar.iter().flatten() {
1199 match op.kind() {
1200 Kind::Match
1201 | Kind::Deletion
1202 | Kind::Skip
1203 | Kind::SequenceMatch
1204 | Kind::SequenceMismatch => {
1205 len += i32::try_from(op.len()).unwrap_or(0);
1206 }
1207 _ => {}
1208 }
1209 }
1210 len
1211}
1212
1213#[allow(clippy::cast_possible_wrap)]
1227fn get_pair_orientation(record: &RecordBuf) -> PairOrientation {
1228 let is_reverse = record.flags().is_reverse_complemented();
1229 let mate_reverse = record.flags().is_mate_reverse_complemented();
1230
1231 if is_reverse == mate_reverse {
1233 return PairOrientation::Tandem;
1234 }
1235
1236 let alignment_start = record.alignment_start().map_or(0, usize::from);
1242 let mate_start = record.mate_alignment_start().map_or(0, usize::from);
1243 let insert_size = record.template_length();
1244
1245 let (positive_five_prime, negative_five_prime) = if is_reverse {
1246 let end = record_utils::alignment_end(record).unwrap_or(alignment_start);
1250 (mate_start as i64, end as i64)
1251 } else {
1252 (alignment_start as i64, alignment_start as i64 + i64::from(insert_size))
1256 };
1257
1258 if positive_five_prime < negative_five_prime {
1260 PairOrientation::FR
1261 } else {
1262 PairOrientation::RF
1263 }
1264}
1265
1266fn set_mate_flags(record: &mut RecordBuf, mate_is_reverse: bool, mate_is_unmapped: bool) {
1273 use noodles::sam::alignment::record::Flags;
1274
1275 let mut flags = record.flags();
1276
1277 flags.remove(Flags::MATE_REVERSE_COMPLEMENTED);
1279 if mate_is_reverse {
1280 flags.insert(Flags::MATE_REVERSE_COMPLEMENTED);
1281 }
1282
1283 flags.remove(Flags::MATE_UNMAPPED);
1285 if mate_is_unmapped {
1286 flags.insert(Flags::MATE_UNMAPPED);
1287 }
1288
1289 *record.flags_mut() = flags;
1290}
1291
1292fn extract_int_value(value: &BufValue) -> Option<i32> {
1305 match value {
1306 BufValue::Int8(i) => Some(i32::from(*i)),
1307 BufValue::Int16(i) => Some(i32::from(*i)),
1308 BufValue::Int32(i) => Some(*i),
1309 BufValue::UInt8(i) => Some(i32::from(*i)),
1310 BufValue::UInt16(i) => Some(i32::from(*i)),
1311 BufValue::UInt32(i) => i32::try_from(*i).ok(),
1312 _ => None,
1313 }
1314}
1315
1316fn kind_to_char(kind: noodles::sam::alignment::record::cigar::op::Kind) -> char {
1341 use noodles::sam::alignment::record::cigar::op::Kind;
1342 match kind {
1343 Kind::Match => 'M',
1344 Kind::Insertion => 'I',
1345 Kind::Deletion => 'D',
1346 Kind::Skip => 'N',
1347 Kind::SoftClip => 'S',
1348 Kind::HardClip => 'H',
1349 Kind::Pad => 'P',
1350 Kind::SequenceMatch => '=',
1351 Kind::SequenceMismatch => 'X',
1352 }
1353}
1354
1355fn cigar_to_string(cigar: &noodles::sam::alignment::record_buf::Cigar) -> Result<String> {
1369 if cigar.is_empty() {
1370 return Ok(String::from("*"));
1371 }
1372
1373 let mut result = String::with_capacity(cigar.as_ref().len() * 4);
1375 for op in cigar.iter() {
1376 let op = op?;
1377 let _ = write!(result, "{}{}", op.len(), kind_to_char(op.kind()));
1378 }
1379 Ok(result)
1380}
1381
1382pub struct TemplateIterator<I>
1405where
1406 I: Iterator<Item = Result<RecordBuf>>,
1407{
1408 record_iter: I,
1409 builder: Builder,
1410}
1411
1412impl<I> TemplateIterator<I>
1413where
1414 I: Iterator<Item = Result<RecordBuf>>,
1415{
1416 pub fn new(record_iter: I) -> Self {
1426 TemplateIterator { record_iter, builder: Builder::new() }
1427 }
1428}
1429
1430impl<I> Iterator for TemplateIterator<I>
1431where
1432 I: Iterator<Item = Result<RecordBuf>>,
1433{
1434 type Item = Result<Template>;
1435
1436 fn next(&mut self) -> Option<Self::Item> {
1447 loop {
1448 match self.record_iter.next() {
1449 None => {
1450 if self.builder.is_empty() {
1452 return None;
1453 }
1454 return Some(self.builder.build());
1455 }
1456 Some(Err(e)) => {
1457 return Some(Err(e));
1459 }
1460 Some(Ok(record)) => {
1461 let name: Vec<u8> = if let Some(n) = record.name() {
1462 Vec::from(<_ as AsRef<[u8]>>::as_ref(n))
1463 } else {
1464 Vec::new()
1465 };
1466
1467 if self.builder.name.iter().any(|n| *n == name) || self.builder.is_empty() {
1468 if let Err(e) = self.builder.push(record) {
1469 return Some(Err(e));
1470 }
1471 } else {
1472 let template: std::result::Result<Template, anyhow::Error> =
1473 self.builder.build();
1474 if let Err(e) = self.builder.push(record) {
1475 return Some(Err(e));
1476 }
1477 return Some(template);
1478 }
1479 }
1480 }
1481 }
1482 }
1483}
1484
1485impl MemoryEstimate for Template {
1490 fn estimate_heap_size(&self) -> usize {
1491 let name_size = self.name.capacity();
1493
1494 let records_size: usize = self.records.iter().map(estimate_record_buf_heap_size).sum();
1502 let records_vec_overhead = self.records.capacity() * std::mem::size_of::<RecordBuf>();
1503
1504 let raw_records_size = self.raw_records.as_ref().map_or(0, |rr| {
1505 rr.iter().map(Vec::capacity).sum::<usize>()
1506 + rr.capacity() * std::mem::size_of::<Vec<u8>>()
1507 });
1508
1509 name_size + records_size + records_vec_overhead + raw_records_size
1510 }
1511}
1512
1513impl MemoryEstimate for TemplateBatch {
1514 fn estimate_heap_size(&self) -> usize {
1515 self.iter().map(MemoryEstimate::estimate_heap_size).sum::<usize>()
1516 + self.capacity() * std::mem::size_of::<Template>()
1517 }
1518}
1519
1520#[must_use]
1532pub fn estimate_record_buf_heap_size(record: &RecordBuf) -> usize {
1533 let name_size = record.name().map_or(0, |n| n.len());
1537
1538 let seq_len = record.sequence().len();
1541
1542 let qual_len = record.quality_scores().len();
1544
1545 let cigar_ops = record.cigar().as_ref().len();
1547 let cigar_size = cigar_ops * 4;
1548
1549 let data_fields = record.data().iter().count();
1559 let entry_capacity = (data_fields * 115) / 100 + 1; let entries_size = data_fields * 48; let hash_table_size = entry_capacity * 16; let value_heap_size = data_fields * 16; let data_size = entries_size + hash_table_size + value_heap_size;
1564
1565 name_size + seq_len + qual_len + cigar_size + data_size
1568}
1569
1570#[cfg(test)]
1571#[allow(clippy::similar_names)]
1572mod tests {
1573 use super::*;
1574 use crate::sam::builder::RecordBuilder;
1575 use noodles::sam::alignment::record::Flags;
1576 use noodles::sam::alignment::record::cigar::Op;
1577 use noodles::sam::alignment::record::cigar::op::Kind;
1578 use noodles::sam::alignment::record_buf::Cigar as CigarBuf;
1579
1580 fn create_test_record(name: &[u8], flags: u16) -> RecordBuf {
1581 RecordBuilder::new()
1582 .name(std::str::from_utf8(name).unwrap())
1583 .flags(Flags::from(flags))
1584 .build()
1585 }
1586
1587 const FLAG_PAIRED: u16 = 0x1;
1589 const FLAG_READ1: u16 = 0x40;
1590 const FLAG_READ2: u16 = 0x80;
1591 const FLAG_SECONDARY: u16 = 0x100;
1592 const FLAG_SUPPLEMENTARY: u16 = 0x800;
1593
1594 #[test]
1595 fn test_template_new() {
1596 let template = Template::new(b"read1".to_vec());
1597 assert_eq!(template.name, b"read1");
1598 assert_eq!(template.read_count(), 0);
1599 }
1600
1601 #[test]
1602 fn test_builder_empty() {
1603 let builder = Builder::new();
1604 assert!(builder.is_empty());
1605 assert_eq!(builder.len(), 0);
1606 }
1607
1608 #[test]
1609 fn test_builder_push_r1_only() -> Result<()> {
1610 let mut builder = Builder::new();
1611 let r1 = create_test_record(b"read1", 0);
1612 builder.push(r1)?;
1613 assert_eq!(builder.len(), 1);
1614 assert!(!builder.is_empty());
1615 Ok(())
1616 }
1617
1618 #[test]
1619 fn test_builder_push_paired_end() -> Result<()> {
1620 let mut builder = Builder::new();
1621 let r1 = create_test_record(b"read1", FLAG_PAIRED | FLAG_READ1);
1622 let r2 = create_test_record(b"read1", FLAG_PAIRED | FLAG_READ2);
1623 builder.push(r1)?;
1624 builder.push(r2)?;
1625 assert_eq!(builder.len(), 2);
1626 Ok(())
1627 }
1628
1629 #[test]
1630 fn test_builder_push_supplementary() -> Result<()> {
1631 let mut builder = Builder::new();
1632 let supp = create_test_record(b"read1", FLAG_SUPPLEMENTARY);
1633 builder.push(supp)?;
1634 assert_eq!(builder.len(), 1);
1635 Ok(())
1636 }
1637
1638 #[test]
1639 fn test_builder_push_secondary() -> Result<()> {
1640 let mut builder = Builder::new();
1641 let sec = create_test_record(b"read1", FLAG_SECONDARY);
1642 builder.push(sec)?;
1643 assert_eq!(builder.len(), 1);
1644 Ok(())
1645 }
1646
1647 #[test]
1648 fn test_builder_error_on_name_mismatch() {
1649 let mut builder = Builder::new();
1650 let r1 = create_test_record(b"read1", 0);
1651 let r2 = create_test_record(b"read2", 0);
1652 builder.push(r1).unwrap();
1653 let result = builder.push(r2);
1654 assert!(result.is_err());
1655 assert!(result.unwrap_err().to_string().contains("mismatch"));
1656 }
1657
1658 #[test]
1659 fn test_builder_error_on_multiple_r1() {
1660 let mut builder = Builder::new();
1661 let r1a = create_test_record(b"read1", 0);
1662 let r1b = create_test_record(b"read1", 0);
1663 builder.push(r1a).unwrap();
1664 let result = builder.push(r1b);
1665 assert!(result.is_err());
1666 assert!(result.unwrap_err().to_string().contains("Multiple non-secondary"));
1667 }
1668
1669 #[test]
1670 fn test_builder_error_on_multiple_r2() {
1671 let mut builder = Builder::new();
1672 let r2a = create_test_record(b"read1", FLAG_PAIRED | FLAG_READ2);
1673 let r2b = create_test_record(b"read1", FLAG_PAIRED | FLAG_READ2);
1674 builder.push(r2a).unwrap();
1675 let result = builder.push(r2b);
1676 assert!(result.is_err());
1677 assert!(result.unwrap_err().to_string().contains("Multiple non-secondary"));
1678 }
1679
1680 #[test]
1681 fn test_builder_build_empty_error() {
1682 let mut builder = Builder::new();
1683 let result = builder.build();
1684 assert!(result.is_err());
1685 assert!(result.unwrap_err().to_string().contains("No records"));
1686 }
1687
1688 #[test]
1689 fn test_builder_build_success() -> Result<()> {
1690 let mut builder = Builder::new();
1691 let r1 = create_test_record(b"read1", 0);
1692 builder.push(r1)?;
1693 let template = builder.build()?;
1694 assert_eq!(template.name, b"read1");
1695 assert_eq!(template.read_count(), 1);
1696 assert!(template.r1().is_some());
1697 Ok(())
1698 }
1699
1700 #[test]
1701 fn test_template_from_records() -> Result<()> {
1702 let records = vec![
1703 create_test_record(b"read1", FLAG_PAIRED | FLAG_READ1),
1704 create_test_record(b"read1", FLAG_PAIRED | FLAG_READ2),
1705 ];
1706 let template = Template::from_records(records)?;
1707 assert_eq!(template.read_count(), 2);
1708 assert!(template.r1().is_some());
1709 assert!(template.r2().is_some());
1710 Ok(())
1711 }
1712
1713 #[test]
1714 fn test_template_primary_reads() -> Result<()> {
1715 let mut builder = Builder::new();
1716 builder.push(create_test_record(b"read1", FLAG_PAIRED | FLAG_READ1))?;
1717 builder.push(create_test_record(b"read1", FLAG_PAIRED | FLAG_READ2))?;
1718 let template = builder.build()?;
1719 let primaries: Vec<_> = template.primary_reads().collect();
1720 assert_eq!(primaries.len(), 2);
1721 Ok(())
1722 }
1723
1724 #[test]
1725 fn test_template_into_primary_reads() -> Result<()> {
1726 let records = vec![
1727 create_test_record(b"read1", FLAG_PAIRED | FLAG_READ1),
1728 create_test_record(b"read1", FLAG_PAIRED | FLAG_READ2),
1729 ];
1730 let template = Template::from_records(records)?;
1731
1732 assert!(template.r1().is_some());
1734 assert!(template.r2().is_some());
1735
1736 let (r1, r2) = template.into_primary_reads();
1738
1739 assert!(r1.is_some());
1741 assert!(r2.is_some());
1742
1743 let r1 = r1.unwrap();
1745 let r2 = r2.unwrap();
1746 assert!(r1.flags().is_first_segment());
1747 assert!(r2.flags().is_last_segment());
1748
1749 Ok(())
1750 }
1751
1752 #[test]
1753 fn test_template_into_primary_reads_r1_only() -> Result<()> {
1754 let records = vec![create_test_record(b"read1", FLAG_PAIRED | FLAG_READ1)];
1755 let template = Template::from_records(records)?;
1756
1757 let (r1, r2) = template.into_primary_reads();
1758
1759 assert!(r1.is_some());
1760 assert!(r2.is_none());
1761
1762 Ok(())
1763 }
1764
1765 #[test]
1766 fn test_template_all_reads() -> Result<()> {
1767 let mut builder = Builder::new();
1768 builder.push(create_test_record(b"read1", 0))?;
1769 builder.push(create_test_record(b"read1", FLAG_SUPPLEMENTARY))?;
1770 builder.push(create_test_record(b"read1", FLAG_SECONDARY))?;
1771 let template = builder.build()?;
1772 let all: Vec<_> = template.all_reads().collect();
1773 assert_eq!(all.len(), 3);
1774 Ok(())
1775 }
1776
1777 #[test]
1778 fn test_template_all_r1s() -> Result<()> {
1779 let mut builder = Builder::new();
1780 builder.push(create_test_record(b"read1", FLAG_PAIRED | FLAG_READ1))?;
1781 builder
1782 .push(create_test_record(b"read1", FLAG_PAIRED | FLAG_READ1 | FLAG_SUPPLEMENTARY))?;
1783 let template = builder.build()?;
1784 let all_r1s: Vec<_> = template.all_r1s().collect();
1785 assert_eq!(all_r1s.len(), 2);
1786 Ok(())
1787 }
1788
1789 #[test]
1790 fn test_template_all_r2s() -> Result<()> {
1791 let mut builder = Builder::new();
1792 builder.push(create_test_record(b"read1", FLAG_PAIRED | FLAG_READ2))?;
1793 builder.push(create_test_record(b"read1", FLAG_PAIRED | FLAG_READ2 | FLAG_SECONDARY))?;
1794 let template = builder.build()?;
1795 let all_r2s: Vec<_> = template.all_r2s().collect();
1796 assert_eq!(all_r2s.len(), 2);
1797 Ok(())
1798 }
1799
1800 #[test]
1801 fn test_template_supplementals_and_secondaries() -> Result<()> {
1802 let mut builder = Builder::new();
1803 builder.push(create_test_record(b"read1", 0))?;
1804 builder.push(create_test_record(b"read1", FLAG_SUPPLEMENTARY))?;
1805 builder.push(create_test_record(b"read1", FLAG_SECONDARY))?;
1806 let template = builder.build()?;
1807 let non_primary: Vec<_> = template.all_supplementary_and_secondary().collect();
1808 assert_eq!(non_primary.len(), 2);
1809 Ok(())
1810 }
1811
1812 #[test]
1813 fn test_cigar_to_string() {
1814 use noodles::sam::alignment::record_buf::Cigar;
1815 let cigar = Cigar::default();
1816 assert_eq!(cigar_to_string(&cigar).unwrap(), "*");
1817 }
1818
1819 #[test]
1820 fn test_cigar_to_string_with_ops() {
1821 let ops =
1822 vec![Op::new(Kind::Match, 10), Op::new(Kind::Deletion, 2), Op::new(Kind::Match, 5)];
1823 let cigar = CigarBuf::from(ops);
1824 let result = cigar_to_string(&cigar).unwrap();
1825 assert_eq!(result, "10M2D5M");
1826 }
1827
1828 #[test]
1829 fn test_template_builder_reuse() -> Result<()> {
1830 let mut builder = Builder::new();
1831 builder.push(create_test_record(b"read1", 0))?;
1832 let template1 = builder.build()?;
1833 assert_eq!(template1.name, b"read1");
1834 builder.push(create_test_record(b"read2", 0))?;
1835 let template2 = builder.build()?;
1836 assert_eq!(template2.name, b"read2");
1837 Ok(())
1838 }
1839
1840 #[test]
1841 fn test_template_r1_supplementals() -> Result<()> {
1842 let mut builder = Builder::new();
1843 builder.push(create_test_record(b"read1", FLAG_PAIRED | FLAG_READ1))?;
1844 builder
1845 .push(create_test_record(b"read1", FLAG_PAIRED | FLAG_READ1 | FLAG_SUPPLEMENTARY))?;
1846 builder
1847 .push(create_test_record(b"read1", FLAG_PAIRED | FLAG_READ1 | FLAG_SUPPLEMENTARY))?;
1848 let template = builder.build()?;
1849 let supps = template.r1_supplementals();
1850 assert_eq!(supps.len(), 2);
1851 Ok(())
1852 }
1853
1854 #[test]
1855 fn test_template_r2_supplementals() -> Result<()> {
1856 let mut builder = Builder::new();
1857 builder.push(create_test_record(b"read1", FLAG_PAIRED | FLAG_READ2))?;
1858 builder
1859 .push(create_test_record(b"read1", FLAG_PAIRED | FLAG_READ2 | FLAG_SUPPLEMENTARY))?;
1860 let template = builder.build()?;
1861 let supps = template.r2_supplementals();
1862 assert_eq!(supps.len(), 1);
1863 Ok(())
1864 }
1865
1866 #[test]
1867 fn test_template_r1_secondaries() -> Result<()> {
1868 let mut builder = Builder::new();
1869 builder.push(create_test_record(b"read1", 0))?;
1870 builder.push(create_test_record(b"read1", FLAG_SECONDARY))?;
1871 let template = builder.build()?;
1872 let secs = template.r1_secondaries();
1873 assert_eq!(secs.len(), 1);
1874 Ok(())
1875 }
1876
1877 #[test]
1878 fn test_template_r2_secondaries() -> Result<()> {
1879 let mut builder = Builder::new();
1880 builder.push(create_test_record(b"read1", FLAG_PAIRED | FLAG_READ2))?;
1881 builder.push(create_test_record(b"read1", FLAG_PAIRED | FLAG_READ2 | FLAG_SECONDARY))?;
1882 builder.push(create_test_record(b"read1", FLAG_PAIRED | FLAG_READ2 | FLAG_SECONDARY))?;
1883 let template = builder.build()?;
1884 let secs = template.r2_secondaries();
1885 assert_eq!(secs.len(), 2);
1886 Ok(())
1887 }
1888
1889 #[test]
1891 fn test_extract_int_value_int8() {
1892 let value = BufValue::Int8(42);
1893 assert_eq!(extract_int_value(&value), Some(42));
1894 }
1895
1896 #[test]
1897 fn test_extract_int_value_int16() {
1898 let value = BufValue::Int16(1234);
1899 assert_eq!(extract_int_value(&value), Some(1234));
1900 }
1901
1902 #[test]
1903 fn test_extract_int_value_int32() {
1904 let value = BufValue::Int32(123_456);
1905 assert_eq!(extract_int_value(&value), Some(123_456));
1906 }
1907
1908 #[test]
1909 fn test_extract_int_value_uint8() {
1910 let value = BufValue::UInt8(255);
1911 assert_eq!(extract_int_value(&value), Some(255));
1912 }
1913
1914 #[test]
1915 fn test_extract_int_value_uint16() {
1916 let value = BufValue::UInt16(65535);
1917 assert_eq!(extract_int_value(&value), Some(65535));
1918 }
1919
1920 #[test]
1921 fn test_extract_int_value_uint32() {
1922 let value = BufValue::UInt32(100_000);
1923 assert_eq!(extract_int_value(&value), Some(100_000));
1924 }
1925
1926 #[test]
1927 fn test_extract_int_value_uint32_overflow() {
1928 let value = BufValue::UInt32(u32::MAX);
1930 assert_eq!(extract_int_value(&value), None);
1931 }
1932
1933 #[test]
1934 fn test_extract_int_value_string_returns_none() {
1935 let value = BufValue::String("test".into());
1936 assert_eq!(extract_int_value(&value), None);
1937 }
1938
1939 #[test]
1940 fn test_extract_int_value_negative() {
1941 let value = BufValue::Int8(-42);
1942 assert_eq!(extract_int_value(&value), Some(-42));
1943
1944 let value = BufValue::Int16(-1234);
1945 assert_eq!(extract_int_value(&value), Some(-1234));
1946
1947 let value = BufValue::Int32(-123_456);
1948 assert_eq!(extract_int_value(&value), Some(-123_456));
1949 }
1950
1951 fn create_mapped_record(name: &[u8], flags: u16, pos: usize, mapq: u8) -> RecordBuf {
1953 create_mapped_record_with_tlen(name, flags, pos, mapq, 0)
1954 }
1955
1956 fn create_mapped_record_with_tlen(
1958 name: &[u8],
1959 flags: u16,
1960 pos: usize,
1961 mapq: u8,
1962 tlen: i32,
1963 ) -> RecordBuf {
1964 let is_read1 = (flags & FLAG_READ1) != 0;
1965 let is_paired = (flags & FLAG_PAIRED) != 0;
1966
1967 let mut builder = RecordBuilder::new()
1968 .name(std::str::from_utf8(name).unwrap())
1969 .sequence(&"A".repeat(100))
1970 .qualities(&[30u8; 100])
1971 .cigar("100M")
1972 .reference_sequence_id(0)
1973 .alignment_start(pos)
1974 .mapping_quality(mapq)
1975 .template_length(tlen);
1976
1977 if is_paired {
1978 builder = builder.first_segment(is_read1);
1979 }
1980
1981 let mut record = builder.build();
1983 *record.flags_mut() = Flags::from(flags);
1985
1986 record
1987 }
1988
1989 #[test]
1992 fn test_fix_mate_info_sets_ms_tag_with_int8_as() -> Result<()> {
1993 let as_tag = Tag::new(b'A', b'S');
1994 let ms_tag = Tag::new(b'm', b's');
1995
1996 let mut r1 = create_mapped_record(b"read1", FLAG_PAIRED | FLAG_READ1, 100, 30);
1997 r1.data_mut().insert(as_tag, BufValue::Int8(55)); let mut r2 = create_mapped_record(b"read1", FLAG_PAIRED | FLAG_READ2, 200, 40);
2000 r2.data_mut().insert(as_tag, BufValue::Int8(44)); let mut builder = Builder::new();
2003 builder.push(r1)?;
2004 builder.push(r2)?;
2005 let mut template = builder.build()?;
2006
2007 template.fix_mate_info()?;
2008
2009 let r1_ms = template.records[0].data().get(&ms_tag);
2011 assert!(r1_ms.is_some(), "R1 should have ms tag");
2012 assert_eq!(extract_int_value(r1_ms.unwrap()), Some(44));
2013
2014 let r2_ms = template.records[1].data().get(&ms_tag);
2016 assert!(r2_ms.is_some(), "R2 should have ms tag");
2017 assert_eq!(extract_int_value(r2_ms.unwrap()), Some(55));
2018
2019 Ok(())
2020 }
2021
2022 #[test]
2024 fn test_fix_mate_info_sets_ms_tag_with_uint8_as() -> Result<()> {
2025 let as_tag = Tag::new(b'A', b'S');
2026 let ms_tag = Tag::new(b'm', b's');
2027
2028 let mut r1 = create_mapped_record(b"read1", FLAG_PAIRED | FLAG_READ1, 100, 30);
2029 r1.data_mut().insert(as_tag, BufValue::UInt8(77)); let mut r2 = create_mapped_record(b"read1", FLAG_PAIRED | FLAG_READ2, 200, 40);
2032 r2.data_mut().insert(as_tag, BufValue::UInt8(88)); let mut builder = Builder::new();
2035 builder.push(r1)?;
2036 builder.push(r2)?;
2037 let mut template = builder.build()?;
2038
2039 template.fix_mate_info()?;
2040
2041 let r1_ms = template.records[0].data().get(&ms_tag);
2043 assert!(r1_ms.is_some(), "R1 should have ms tag");
2044 assert_eq!(extract_int_value(r1_ms.unwrap()), Some(88));
2045
2046 let r2_ms = template.records[1].data().get(&ms_tag);
2048 assert!(r2_ms.is_some(), "R2 should have ms tag");
2049 assert_eq!(extract_int_value(r2_ms.unwrap()), Some(77));
2050
2051 Ok(())
2052 }
2053
2054 #[test]
2056 fn test_fix_mate_info_sets_ms_tag_with_int16_as() -> Result<()> {
2057 let as_tag = Tag::new(b'A', b'S');
2058 let ms_tag = Tag::new(b'm', b's');
2059
2060 let mut r1 = create_mapped_record(b"read1", FLAG_PAIRED | FLAG_READ1, 100, 30);
2061 r1.data_mut().insert(as_tag, BufValue::Int16(1000)); let mut r2 = create_mapped_record(b"read1", FLAG_PAIRED | FLAG_READ2, 200, 40);
2064 r2.data_mut().insert(as_tag, BufValue::Int16(2000)); let mut builder = Builder::new();
2067 builder.push(r1)?;
2068 builder.push(r2)?;
2069 let mut template = builder.build()?;
2070
2071 template.fix_mate_info()?;
2072
2073 let r1_ms = template.records[0].data().get(&ms_tag);
2075 assert!(r1_ms.is_some(), "R1 should have ms tag");
2076 assert_eq!(extract_int_value(r1_ms.unwrap()), Some(2000));
2077
2078 let r2_ms = template.records[1].data().get(&ms_tag);
2080 assert!(r2_ms.is_some(), "R2 should have ms tag");
2081 assert_eq!(extract_int_value(r2_ms.unwrap()), Some(1000));
2082
2083 Ok(())
2084 }
2085
2086 #[test]
2088 fn test_fix_mate_info_sets_ms_tag_with_int32_as() -> Result<()> {
2089 let as_tag = Tag::new(b'A', b'S');
2090 let ms_tag = Tag::new(b'm', b's');
2091
2092 let mut r1 = create_mapped_record(b"read1", FLAG_PAIRED | FLAG_READ1, 100, 30);
2093 r1.data_mut().insert(as_tag, BufValue::Int32(100_000)); let mut r2 = create_mapped_record(b"read1", FLAG_PAIRED | FLAG_READ2, 200, 40);
2096 r2.data_mut().insert(as_tag, BufValue::Int32(200_000)); let mut builder = Builder::new();
2099 builder.push(r1)?;
2100 builder.push(r2)?;
2101 let mut template = builder.build()?;
2102
2103 template.fix_mate_info()?;
2104
2105 let r1_ms = template.records[0].data().get(&ms_tag);
2107 assert!(r1_ms.is_some(), "R1 should have ms tag");
2108 assert_eq!(extract_int_value(r1_ms.unwrap()), Some(200_000));
2109
2110 let r2_ms = template.records[1].data().get(&ms_tag);
2112 assert!(r2_ms.is_some(), "R2 should have ms tag");
2113 assert_eq!(extract_int_value(r2_ms.unwrap()), Some(100_000));
2114
2115 Ok(())
2116 }
2117
2118 #[test]
2120 fn test_fix_mate_info_no_ms_tag_without_as() -> Result<()> {
2121 let ms_tag = Tag::new(b'm', b's');
2122
2123 let r1 = create_mapped_record(b"read1", FLAG_PAIRED | FLAG_READ1, 100, 30);
2124 let r2 = create_mapped_record(b"read1", FLAG_PAIRED | FLAG_READ2, 200, 40);
2125
2126 let mut builder = Builder::new();
2127 builder.push(r1)?;
2128 builder.push(r2)?;
2129 let mut template = builder.build()?;
2130
2131 template.fix_mate_info()?;
2132
2133 assert!(template.records[0].data().get(&ms_tag).is_none(), "R1 should not have ms tag");
2135 assert!(template.records[1].data().get(&ms_tag).is_none(), "R2 should not have ms tag");
2136
2137 Ok(())
2138 }
2139
2140 #[test]
2142 fn test_fix_mate_info_sets_ms_tag_on_supplementals() -> Result<()> {
2143 let as_tag = Tag::new(b'A', b'S');
2144 let ms_tag = Tag::new(b'm', b's');
2145
2146 let mut r1 = create_mapped_record(b"read1", FLAG_PAIRED | FLAG_READ1, 100, 30);
2147 r1.data_mut().insert(as_tag, BufValue::Int8(55));
2148
2149 let mut r2 = create_mapped_record(b"read1", FLAG_PAIRED | FLAG_READ2, 200, 40);
2150 r2.data_mut().insert(as_tag, BufValue::Int8(44));
2151
2152 let mut r1_supp =
2153 create_mapped_record(b"read1", FLAG_PAIRED | FLAG_READ1 | FLAG_SUPPLEMENTARY, 300, 20);
2154 r1_supp.data_mut().insert(as_tag, BufValue::Int8(33));
2155
2156 let mut builder = Builder::new();
2157 builder.push(r1)?;
2158 builder.push(r2)?;
2159 builder.push(r1_supp)?;
2160 let mut template = builder.build()?;
2161
2162 template.fix_mate_info()?;
2163
2164 let r1_supp_ms = template.records[2].data().get(&ms_tag);
2166 assert!(r1_supp_ms.is_some(), "R1 supplementary should have ms tag");
2167 assert_eq!(extract_int_value(r1_supp_ms.unwrap()), Some(44));
2168
2169 Ok(())
2170 }
2171
2172 #[test]
2175 fn test_fix_mate_info_sets_tlen_on_r1_supplementals() -> Result<()> {
2176 let r1 = create_mapped_record_with_tlen(b"read1", FLAG_PAIRED | FLAG_READ1, 100, 30, 200);
2178 let r2 = create_mapped_record_with_tlen(b"read1", FLAG_PAIRED | FLAG_READ2, 200, 40, -200);
2180 let r1_supp = create_mapped_record_with_tlen(
2182 b"read1",
2183 FLAG_PAIRED | FLAG_READ1 | FLAG_SUPPLEMENTARY,
2184 300,
2185 20,
2186 0,
2187 );
2188
2189 let mut builder = Builder::new();
2190 builder.push(r1)?;
2191 builder.push(r2)?;
2192 builder.push(r1_supp)?;
2193 let mut template = builder.build()?;
2194
2195 template.fix_mate_info()?;
2196
2197 assert_eq!(
2200 template.records[2].template_length(),
2201 101,
2202 "R1 supplementary TLEN should be negative of R2's TLEN"
2203 );
2204
2205 Ok(())
2206 }
2207
2208 #[test]
2211 fn test_fix_mate_info_sets_tlen_on_r2_supplementals() -> Result<()> {
2212 let r1 = create_mapped_record_with_tlen(b"read1", FLAG_PAIRED | FLAG_READ1, 100, 30, 300);
2214 let r2 = create_mapped_record_with_tlen(b"read1", FLAG_PAIRED | FLAG_READ2, 200, 40, -300);
2216 let r2_supp = create_mapped_record_with_tlen(
2218 b"read1",
2219 FLAG_PAIRED | FLAG_READ2 | FLAG_SUPPLEMENTARY,
2220 400,
2221 25,
2222 0,
2223 );
2224
2225 let mut builder = Builder::new();
2226 builder.push(r1)?;
2227 builder.push(r2)?;
2228 builder.push(r2_supp)?;
2229 let mut template = builder.build()?;
2230
2231 template.fix_mate_info()?;
2232
2233 assert_eq!(
2236 template.records[2].template_length(),
2237 -101,
2238 "R2 supplementary TLEN should be negative of R1's TLEN"
2239 );
2240
2241 Ok(())
2242 }
2243
2244 #[test]
2246 fn test_fix_mate_info_sets_tlen_on_multiple_supplementals() -> Result<()> {
2247 let r1 = create_mapped_record_with_tlen(b"read1", FLAG_PAIRED | FLAG_READ1, 100, 30, 500);
2249 let r2 = create_mapped_record_with_tlen(b"read1", FLAG_PAIRED | FLAG_READ2, 200, 40, -500);
2251 let r1_supp1 = create_mapped_record_with_tlen(
2253 b"read1",
2254 FLAG_PAIRED | FLAG_READ1 | FLAG_SUPPLEMENTARY,
2255 300,
2256 20,
2257 0,
2258 );
2259 let r1_supp2 = create_mapped_record_with_tlen(
2260 b"read1",
2261 FLAG_PAIRED | FLAG_READ1 | FLAG_SUPPLEMENTARY,
2262 400,
2263 15,
2264 0,
2265 );
2266 let r2_supp1 = create_mapped_record_with_tlen(
2268 b"read1",
2269 FLAG_PAIRED | FLAG_READ2 | FLAG_SUPPLEMENTARY,
2270 500,
2271 25,
2272 0,
2273 );
2274 let r2_supp2 = create_mapped_record_with_tlen(
2275 b"read1",
2276 FLAG_PAIRED | FLAG_READ2 | FLAG_SUPPLEMENTARY,
2277 600,
2278 10,
2279 0,
2280 );
2281
2282 let mut builder = Builder::new();
2283 builder.push(r1)?;
2284 builder.push(r2)?;
2285 builder.push(r1_supp1)?;
2286 builder.push(r1_supp2)?;
2287 builder.push(r2_supp1)?;
2288 builder.push(r2_supp2)?;
2289 let mut template = builder.build()?;
2290
2291 template.fix_mate_info()?;
2292
2293 assert_eq!(template.records[2].template_length(), 101, "R1 supp1 TLEN");
2298 assert_eq!(template.records[3].template_length(), 101, "R1 supp2 TLEN");
2299
2300 assert_eq!(template.records[4].template_length(), -101, "R2 supp1 TLEN");
2302 assert_eq!(template.records[5].template_length(), -101, "R2 supp2 TLEN");
2303
2304 Ok(())
2305 }
2306
2307 #[test]
2309 fn test_fix_mate_info_tlen_recalculated() -> Result<()> {
2310 let r1 = create_mapped_record_with_tlen(b"read1", FLAG_PAIRED | FLAG_READ1, 100, 30, 0);
2313 let r2 = create_mapped_record_with_tlen(b"read1", FLAG_PAIRED | FLAG_READ2, 200, 40, 0);
2314 let r1_supp = create_mapped_record_with_tlen(
2315 b"read1",
2316 FLAG_PAIRED | FLAG_READ1 | FLAG_SUPPLEMENTARY,
2317 300,
2318 20,
2319 999, );
2321
2322 let mut builder = Builder::new();
2323 builder.push(r1)?;
2324 builder.push(r2)?;
2325 builder.push(r1_supp)?;
2326 let mut template = builder.build()?;
2327
2328 template.fix_mate_info()?;
2329
2330 assert_eq!(
2333 template.records[2].template_length(),
2334 101,
2335 "R1 supplementary TLEN should be recalculated from positions"
2336 );
2337
2338 Ok(())
2339 }
2340
2341 #[test]
2343 fn test_fix_mate_info_no_mate_primary() -> Result<()> {
2344 let r1 = create_mapped_record_with_tlen(b"read1", FLAG_PAIRED | FLAG_READ1, 100, 30, 200);
2346 let r1_supp = create_mapped_record_with_tlen(
2348 b"read1",
2349 FLAG_PAIRED | FLAG_READ1 | FLAG_SUPPLEMENTARY,
2350 300,
2351 20,
2352 777, );
2354
2355 let mut builder = Builder::new();
2356 builder.push(r1)?;
2357 builder.push(r1_supp)?;
2358 let mut template = builder.build()?;
2359
2360 template.fix_mate_info()?;
2361
2362 assert_eq!(
2364 template.records[1].template_length(),
2365 777,
2366 "R1 supplementary TLEN should be unchanged without R2 primary"
2367 );
2368
2369 Ok(())
2370 }
2371
2372 #[test]
2375 fn test_template_record_ordering() -> Result<()> {
2376 let r1 = create_test_record(b"read1", FLAG_PAIRED | FLAG_READ1);
2377 let r2 = create_test_record(b"read1", FLAG_PAIRED | FLAG_READ2);
2378 let r1_supp1 = create_test_record(b"read1", FLAG_PAIRED | FLAG_READ1 | FLAG_SUPPLEMENTARY);
2379 let r1_supp2 = create_test_record(b"read1", FLAG_PAIRED | FLAG_READ1 | FLAG_SUPPLEMENTARY);
2380 let r2_supp = create_test_record(b"read1", FLAG_PAIRED | FLAG_READ2 | FLAG_SUPPLEMENTARY);
2381 let r1_sec = create_test_record(b"read1", FLAG_PAIRED | FLAG_READ1 | FLAG_SECONDARY);
2382 let r2_sec1 = create_test_record(b"read1", FLAG_PAIRED | FLAG_READ2 | FLAG_SECONDARY);
2383 let r2_sec2 = create_test_record(b"read1", FLAG_PAIRED | FLAG_READ2 | FLAG_SECONDARY);
2384
2385 let mut builder = Builder::new();
2387 builder.push(r2_sec1)?;
2388 builder.push(r1_supp2)?;
2389 builder.push(r1)?;
2390 builder.push(r2_supp)?;
2391 builder.push(r1_sec)?;
2392 builder.push(r2)?;
2393 builder.push(r2_sec2)?;
2394 builder.push(r1_supp1)?;
2395 let template = builder.build()?;
2396
2397 assert_eq!(template.r1, Some((0, 1)), "R1 should be at index 0");
2399 assert_eq!(template.r2, Some((1, 2)), "R2 should be at index 1");
2400 assert_eq!(template.r1_supplementals, Some((2, 4)), "R1 supps should be at indices 2-3");
2401 assert_eq!(template.r2_supplementals, Some((4, 5)), "R2 supps should be at index 4");
2402 assert_eq!(template.r1_secondaries, Some((5, 6)), "R1 secs should be at index 5");
2403 assert_eq!(template.r2_secondaries, Some((6, 8)), "R2 secs should be at indices 6-7");
2404
2405 let all_reads: Vec<_> = template.all_reads().collect();
2407 assert_eq!(all_reads.len(), 8);
2408
2409 assert!(
2411 all_reads[0].flags().is_first_segment() && !all_reads[0].flags().is_supplementary()
2412 );
2413 assert!(all_reads[1].flags().is_last_segment() && !all_reads[1].flags().is_supplementary());
2414 assert!(all_reads[2].flags().is_first_segment() && all_reads[2].flags().is_supplementary());
2415 assert!(all_reads[3].flags().is_first_segment() && all_reads[3].flags().is_supplementary());
2416 assert!(all_reads[4].flags().is_last_segment() && all_reads[4].flags().is_supplementary());
2417 assert!(all_reads[5].flags().is_first_segment() && all_reads[5].flags().is_secondary());
2418 assert!(all_reads[6].flags().is_last_segment() && all_reads[6].flags().is_secondary());
2419 assert!(all_reads[7].flags().is_last_segment() && all_reads[7].flags().is_secondary());
2420
2421 Ok(())
2422 }
2423
2424 #[test]
2427 fn test_record_ordering_within_groups_is_reversed() -> Result<()> {
2428 let r1 = create_mapped_record(b"read1", FLAG_PAIRED | FLAG_READ1, 100, 30);
2430 let r2 = create_mapped_record(b"read1", FLAG_PAIRED | FLAG_READ2, 200, 30);
2431
2432 let r1_supp1 =
2434 create_mapped_record(b"read1", FLAG_PAIRED | FLAG_READ1 | FLAG_SUPPLEMENTARY, 1000, 20);
2435 let r1_supp2 =
2436 create_mapped_record(b"read1", FLAG_PAIRED | FLAG_READ1 | FLAG_SUPPLEMENTARY, 2000, 20);
2437 let r1_supp3 =
2438 create_mapped_record(b"read1", FLAG_PAIRED | FLAG_READ1 | FLAG_SUPPLEMENTARY, 3000, 20);
2439
2440 let r2_supp1 =
2442 create_mapped_record(b"read1", FLAG_PAIRED | FLAG_READ2 | FLAG_SUPPLEMENTARY, 4000, 20);
2443 let r2_supp2 =
2444 create_mapped_record(b"read1", FLAG_PAIRED | FLAG_READ2 | FLAG_SUPPLEMENTARY, 5000, 20);
2445
2446 let r1_sec1 =
2448 create_mapped_record(b"read1", FLAG_PAIRED | FLAG_READ1 | FLAG_SECONDARY, 6000, 10);
2449 let r1_sec2 =
2450 create_mapped_record(b"read1", FLAG_PAIRED | FLAG_READ1 | FLAG_SECONDARY, 7000, 10);
2451
2452 let mut builder = Builder::new();
2454 builder.push(r1)?;
2455 builder.push(r2)?;
2456 builder.push(r1_supp1)?;
2457 builder.push(r1_supp2)?;
2458 builder.push(r1_supp3)?;
2459 builder.push(r2_supp1)?;
2460 builder.push(r2_supp2)?;
2461 builder.push(r1_sec1)?;
2462 builder.push(r1_sec2)?;
2463
2464 let template = builder.build()?;
2465
2466 let get_pos = |rec: &RecordBuf| -> usize { rec.alignment_start().map_or(0, |p| p.get()) };
2468
2469 assert_eq!(get_pos(&template.records[0]), 100, "R1 primary");
2471 assert_eq!(get_pos(&template.records[1]), 200, "R2 primary");
2472
2473 assert_eq!(get_pos(&template.records[2]), 3000, "R1 supp should be in reverse order");
2475 assert_eq!(get_pos(&template.records[3]), 2000, "R1 supp should be in reverse order");
2476 assert_eq!(get_pos(&template.records[4]), 1000, "R1 supp should be in reverse order");
2477
2478 assert_eq!(get_pos(&template.records[5]), 5000, "R2 supp should be in reverse order");
2480 assert_eq!(get_pos(&template.records[6]), 4000, "R2 supp should be in reverse order");
2481
2482 assert_eq!(get_pos(&template.records[7]), 7000, "R1 sec should be in reverse order");
2484 assert_eq!(get_pos(&template.records[8]), 6000, "R1 sec should be in reverse order");
2485
2486 Ok(())
2487 }
2488
2489 fn create_mapped_record_with_flags(
2491 name: &[u8],
2492 flags: u16,
2493 pos: usize,
2494 mapq: u8,
2495 tlen: i32,
2496 mate_ref_id: Option<usize>,
2497 mate_pos: Option<usize>,
2498 ) -> RecordBuf {
2499 let is_read1 = (flags & FLAG_READ1) != 0;
2500 let is_paired = (flags & FLAG_PAIRED) != 0;
2501
2502 let mut builder = RecordBuilder::new()
2503 .name(std::str::from_utf8(name).unwrap())
2504 .sequence(&"A".repeat(100))
2505 .qualities(&[30u8; 100])
2506 .cigar("100M")
2507 .reference_sequence_id(0)
2508 .alignment_start(pos)
2509 .mapping_quality(mapq)
2510 .template_length(tlen);
2511
2512 if is_paired {
2513 builder = builder.first_segment(is_read1);
2514 }
2515
2516 if let Some(mate_ref) = mate_ref_id {
2517 builder = builder.mate_reference_sequence_id(mate_ref);
2518 }
2519 if let Some(mate_p) = mate_pos {
2520 builder = builder.mate_alignment_start(mate_p);
2521 }
2522
2523 let mut record = builder.build();
2525 *record.flags_mut() = Flags::from(flags);
2526
2527 record
2528 }
2529
2530 const FLAG_REVERSE: u16 = 0x10;
2532 const FLAG_MATE_REVERSE: u16 = 0x20;
2533 const FLAG_UNMAPPED: u16 = 0x4;
2534 const FLAG_MATE_UNMAPPED: u16 = 0x8;
2535
2536 #[test]
2539 fn test_fix_mate_info_sets_full_mate_info_on_r1_supplementals() -> Result<()> {
2540 let mq_tag = Tag::new(b'M', b'Q');
2541 let mc_tag = Tag::new(b'M', b'C');
2542 let as_tag = Tag::new(b'A', b'S');
2543 let ms_tag = Tag::new(b'm', b's');
2544
2545 let mut r1 = create_mapped_record_with_flags(
2547 b"read1",
2548 FLAG_PAIRED | FLAG_READ1,
2549 100,
2550 30,
2551 200,
2552 Some(0),
2553 Some(200),
2554 );
2555 r1.data_mut().insert(as_tag, BufValue::Int32(100));
2556
2557 let mut r2 = create_mapped_record_with_flags(
2559 b"read1",
2560 FLAG_PAIRED | FLAG_READ2 | FLAG_REVERSE,
2561 200,
2562 40,
2563 -200,
2564 Some(0),
2565 Some(100),
2566 );
2567 r2.data_mut().insert(as_tag, BufValue::Int32(150));
2568
2569 let r1_supp = create_mapped_record_with_flags(
2571 b"read1",
2572 FLAG_PAIRED | FLAG_READ1 | FLAG_SUPPLEMENTARY | FLAG_MATE_REVERSE, 500,
2574 25,
2575 0, Some(0),
2577 Some(500), );
2579
2580 let mut builder = Builder::new();
2581 builder.push(r1)?;
2582 builder.push(r2)?;
2583 builder.push(r1_supp)?;
2584 let mut template = builder.build()?;
2585
2586 template.fix_mate_info()?;
2587
2588 let supp = &template.records[2];
2590
2591 assert_eq!(
2593 supp.mate_alignment_start().map(|p| p.get()),
2594 Some(200),
2595 "R1 supp should have mate pos from R2"
2596 );
2597
2598 assert_eq!(
2600 supp.mate_reference_sequence_id(),
2601 Some(0),
2602 "R1 supp should have mate ref from R2"
2603 );
2604
2605 assert!(
2607 supp.flags().is_mate_reverse_complemented(),
2608 "R1 supp should have mate_reverse set since R2 is reverse"
2609 );
2610
2611 assert!(
2613 !supp.flags().is_mate_unmapped(),
2614 "R1 supp should NOT have mate_unmapped since R2 is mapped"
2615 );
2616
2617 assert_eq!(supp.template_length(), 200, "R1 supp TLEN should be -(-200) = 200");
2619
2620 let mq_value = supp.data().get(&mq_tag).and_then(extract_int_value);
2622 assert_eq!(mq_value, Some(40), "R1 supp MQ should be R2's mapq");
2623
2624 let mc_value = supp.data().get(&mc_tag);
2626 assert!(mc_value.is_some(), "R1 supp should have MC tag");
2627
2628 let ms_value = supp.data().get(&ms_tag).and_then(extract_int_value);
2630 assert_eq!(ms_value, Some(150), "R1 supp ms should be R2's AS value");
2631
2632 Ok(())
2633 }
2634
2635 #[test]
2638 fn test_fix_mate_info_sets_full_mate_info_on_r2_supplementals() -> Result<()> {
2639 let mq_tag = Tag::new(b'M', b'Q');
2640 let mc_tag = Tag::new(b'M', b'C');
2641 let as_tag = Tag::new(b'A', b'S');
2642 let ms_tag = Tag::new(b'm', b's');
2643
2644 let mut r1 = create_mapped_record_with_flags(
2646 b"read1",
2647 FLAG_PAIRED | FLAG_READ1,
2648 100,
2649 35,
2650 300,
2651 Some(0),
2652 Some(200),
2653 );
2654 r1.data_mut().insert(as_tag, BufValue::Int32(120));
2655
2656 let mut r2 = create_mapped_record_with_flags(
2658 b"read1",
2659 FLAG_PAIRED | FLAG_READ2,
2660 200,
2661 45,
2662 -300,
2663 Some(0),
2664 Some(100),
2665 );
2666 r2.data_mut().insert(as_tag, BufValue::Int32(180));
2667
2668 let r2_supp = create_mapped_record_with_flags(
2670 b"read1",
2671 FLAG_PAIRED | FLAG_READ2 | FLAG_SUPPLEMENTARY,
2672 600,
2673 20,
2674 0, Some(0),
2676 Some(600), );
2678
2679 let mut builder = Builder::new();
2680 builder.push(r1)?;
2681 builder.push(r2)?;
2682 builder.push(r2_supp)?;
2683 let mut template = builder.build()?;
2684
2685 template.fix_mate_info()?;
2686
2687 let supp = &template.records[2];
2689
2690 assert_eq!(
2692 supp.mate_alignment_start().map(|p| p.get()),
2693 Some(100),
2694 "R2 supp should have mate pos from R1"
2695 );
2696
2697 assert!(
2699 !supp.flags().is_mate_reverse_complemented(),
2700 "R2 supp should NOT have mate_reverse since R1 is forward"
2701 );
2702
2703 assert_eq!(supp.template_length(), -101, "R2 supp TLEN should be -(101) = -101");
2706
2707 let mq_value = supp.data().get(&mq_tag).and_then(extract_int_value);
2709 assert_eq!(mq_value, Some(35), "R2 supp MQ should be R1's mapq");
2710
2711 assert!(supp.data().get(&mc_tag).is_some(), "R2 supp should have MC tag");
2713
2714 let ms_value = supp.data().get(&ms_tag).and_then(extract_int_value);
2716 assert_eq!(ms_value, Some(120), "R2 supp ms should be R1's AS value");
2717
2718 Ok(())
2719 }
2720
2721 #[test]
2724 fn test_fix_mate_info_supplemental_with_unmapped_mate() -> Result<()> {
2725 let mc_tag = Tag::new(b'M', b'C');
2726
2727 let r1 = create_mapped_record_with_flags(
2729 b"read1",
2730 FLAG_PAIRED | FLAG_READ1 | FLAG_MATE_UNMAPPED, 100,
2732 30,
2733 0, Some(0),
2735 Some(100), );
2737
2738 let mut r2 = create_mapped_record_with_flags(
2740 b"read1",
2741 FLAG_PAIRED | FLAG_READ2 | FLAG_UNMAPPED,
2742 100, 0, 0, Some(0),
2746 Some(100),
2747 );
2748 *r2.cigar_mut() = CigarBuf::default();
2750
2751 let r1_supp = create_mapped_record_with_flags(
2753 b"read1",
2754 FLAG_PAIRED | FLAG_READ1 | FLAG_SUPPLEMENTARY | FLAG_MATE_REVERSE, 500,
2756 25,
2757 999, Some(0),
2759 Some(500), );
2761
2762 let mut builder = Builder::new();
2763 builder.push(r1)?;
2764 builder.push(r2)?;
2765 builder.push(r1_supp)?;
2766 let mut template = builder.build()?;
2767
2768 template.fix_mate_info()?;
2769
2770 let supp = &template.records[2];
2772
2773 assert!(
2775 supp.flags().is_mate_unmapped(),
2776 "R1 supp should have mate_unmapped since R2 is unmapped"
2777 );
2778
2779 assert!(
2781 !supp.flags().is_mate_reverse_complemented(),
2782 "R1 supp should NOT have mate_reverse when R2 is unmapped (unmapped reads have no orientation)"
2783 );
2784
2785 assert!(
2787 supp.data().get(&mc_tag).is_none(),
2788 "R1 supp should NOT have MC tag when mate is unmapped"
2789 );
2790
2791 assert_eq!(supp.template_length(), 0, "TLEN should be 0 when mate is unmapped");
2793
2794 Ok(())
2795 }
2796
2797 #[test]
2800 fn test_fix_mate_info_clears_incorrect_mate_reverse_flag() -> Result<()> {
2801 let r1 = create_mapped_record_with_flags(
2803 b"read1",
2804 FLAG_PAIRED | FLAG_READ1,
2805 100,
2806 30,
2807 200,
2808 Some(0),
2809 Some(200),
2810 );
2811
2812 let r2 = create_mapped_record_with_flags(
2814 b"read1",
2815 FLAG_PAIRED | FLAG_READ2, 200,
2817 40,
2818 -200,
2819 Some(0),
2820 Some(100),
2821 );
2822
2823 let r1_supp = create_mapped_record_with_flags(
2825 b"read1",
2826 FLAG_PAIRED | FLAG_READ1 | FLAG_SUPPLEMENTARY | FLAG_MATE_REVERSE, 500,
2828 25,
2829 0,
2830 Some(0),
2831 Some(500),
2832 );
2833
2834 let mut builder = Builder::new();
2835 builder.push(r1)?;
2836 builder.push(r2)?;
2837 builder.push(r1_supp)?;
2838 let mut template = builder.build()?;
2839
2840 assert!(
2842 template.records[2].flags().is_mate_reverse_complemented(),
2843 "Before fix: supp incorrectly has mate_reverse"
2844 );
2845
2846 template.fix_mate_info()?;
2847
2848 assert!(
2850 !template.records[2].flags().is_mate_reverse_complemented(),
2851 "After fix: supp should NOT have mate_reverse since R2 is forward"
2852 );
2853
2854 Ok(())
2855 }
2856
2857 #[test]
2859 fn test_fix_mate_info_multiple_supplementals() -> Result<()> {
2860 let as_tag = Tag::new(b'A', b'S');
2861
2862 let r1 = create_mapped_record_with_flags(
2864 b"read1",
2865 FLAG_PAIRED | FLAG_READ1,
2866 100,
2867 30,
2868 400,
2869 Some(0),
2870 Some(300),
2871 );
2872
2873 let mut r2 = create_mapped_record_with_flags(
2875 b"read1",
2876 FLAG_PAIRED | FLAG_READ2 | FLAG_REVERSE,
2877 300,
2878 50,
2879 -400,
2880 Some(0),
2881 Some(100),
2882 );
2883 r2.data_mut().insert(as_tag, BufValue::Int32(200));
2884
2885 let r1_supp1 = create_mapped_record_with_flags(
2887 b"read1",
2888 FLAG_PAIRED | FLAG_READ1 | FLAG_SUPPLEMENTARY,
2889 500,
2890 20,
2891 0,
2892 Some(0),
2893 Some(500),
2894 );
2895 let r1_supp2 = create_mapped_record_with_flags(
2896 b"read1",
2897 FLAG_PAIRED | FLAG_READ1 | FLAG_SUPPLEMENTARY,
2898 700,
2899 15,
2900 0,
2901 Some(0),
2902 Some(700),
2903 );
2904
2905 let mut builder = Builder::new();
2906 builder.push(r1)?;
2907 builder.push(r2)?;
2908 builder.push(r1_supp1)?;
2909 builder.push(r1_supp2)?;
2910 let mut template = builder.build()?;
2911
2912 template.fix_mate_info()?;
2913
2914 for i in 2..=3 {
2918 let supp = &template.records[i];
2919
2920 assert_eq!(
2921 supp.mate_alignment_start().map(|p| p.get()),
2922 Some(300),
2923 "Supp {i} should have mate pos 300"
2924 );
2925 assert!(
2926 supp.flags().is_mate_reverse_complemented(),
2927 "Supp {i} should have mate_reverse"
2928 );
2929 assert_eq!(supp.template_length(), 300, "Supp {i} TLEN should be 300");
2930 }
2931
2932 Ok(())
2933 }
2934
2935 fn create_insert_size_record(
2941 name: &[u8],
2942 flags: u16,
2943 pos: usize,
2944 read_length: usize,
2945 ref_id: Option<usize>,
2946 ) -> RecordBuf {
2947 let is_read1 = (flags & FLAG_READ1) != 0;
2948 let is_paired = (flags & FLAG_PAIRED) != 0;
2949
2950 let mut builder = RecordBuilder::new()
2951 .name(std::str::from_utf8(name).unwrap())
2952 .sequence(&"A".repeat(read_length))
2953 .qualities(&vec![30u8; read_length])
2954 .cigar(&format!("{read_length}M"))
2955 .alignment_start(pos)
2956 .mapping_quality(30);
2957
2958 if let Some(rid) = ref_id {
2959 builder = builder.reference_sequence_id(rid);
2960 }
2961
2962 if is_paired {
2963 builder = builder.first_segment(is_read1);
2964 }
2965
2966 let mut record = builder.build();
2968 *record.flags_mut() = Flags::from(flags);
2969
2970 record
2971 }
2972
2973 #[test]
2979 fn test_compute_insert_size_normal_innie() {
2980 let r1 = create_insert_size_record(b"read1", FLAG_PAIRED | FLAG_READ1, 1, 100, Some(0));
2981 let r2 = create_insert_size_record(
2982 b"read1",
2983 FLAG_PAIRED | FLAG_READ2 | FLAG_REVERSE,
2984 500,
2985 100,
2986 Some(0),
2987 );
2988
2989 let insert_size = compute_insert_size(&r1, &r2);
2990 assert_eq!(insert_size, 599);
2994 }
2995
2996 #[test]
3000 fn test_compute_insert_size_overlapping_innie() {
3001 let r1 = create_insert_size_record(b"read1", FLAG_PAIRED | FLAG_READ1, 1, 100, Some(0));
3002 let r2 = create_insert_size_record(
3003 b"read1",
3004 FLAG_PAIRED | FLAG_READ2 | FLAG_REVERSE,
3005 50,
3006 100,
3007 Some(0),
3008 );
3009
3010 let insert_size = compute_insert_size(&r1, &r2);
3011 assert_eq!(insert_size, 149);
3014 }
3015
3016 #[test]
3020 fn test_compute_insert_size_completely_overlapping_innie() {
3021 let r1 = create_insert_size_record(b"read1", FLAG_PAIRED | FLAG_READ1, 1, 100, Some(0));
3022 let r2 = create_insert_size_record(
3023 b"read1",
3024 FLAG_PAIRED | FLAG_READ2 | FLAG_REVERSE,
3025 1,
3026 100,
3027 Some(0),
3028 );
3029
3030 let insert_size = compute_insert_size(&r1, &r2);
3031 assert_eq!(insert_size, 100);
3034 }
3035
3036 #[test]
3040 fn test_compute_insert_size_normal_outie() {
3041 let r1 = create_insert_size_record(
3042 b"read1",
3043 FLAG_PAIRED | FLAG_READ1 | FLAG_REVERSE,
3044 1,
3045 100,
3046 Some(0),
3047 );
3048 let r2 = create_insert_size_record(b"read1", FLAG_PAIRED | FLAG_READ2, 500, 100, Some(0));
3049
3050 let insert_size = compute_insert_size(&r1, &r2);
3051 assert_eq!(insert_size, 401);
3054 }
3055
3056 #[test]
3060 fn test_compute_insert_size_forward_tandem() {
3061 let r1 = create_insert_size_record(
3062 b"read1",
3063 FLAG_PAIRED | FLAG_READ1 | FLAG_REVERSE,
3064 1,
3065 100,
3066 Some(0),
3067 );
3068 let r2 = create_insert_size_record(
3069 b"read1",
3070 FLAG_PAIRED | FLAG_READ2 | FLAG_REVERSE,
3071 500,
3072 100,
3073 Some(0),
3074 );
3075
3076 let insert_size = compute_insert_size(&r1, &r2);
3077 assert_eq!(insert_size, 500);
3080 }
3081
3082 #[test]
3086 fn test_compute_insert_size_reverse_tandem() {
3087 let r1 = create_insert_size_record(b"read1", FLAG_PAIRED | FLAG_READ1, 1, 100, Some(0));
3088 let r2 = create_insert_size_record(b"read1", FLAG_PAIRED | FLAG_READ2, 500, 100, Some(0));
3089
3090 let insert_size = compute_insert_size(&r1, &r2);
3091 assert_eq!(insert_size, 500);
3094 }
3095
3096 #[test]
3100 fn test_compute_insert_size_negative() {
3101 let r1 = create_insert_size_record(b"read1", FLAG_PAIRED | FLAG_READ1, 500, 100, Some(0));
3102 let r2 = create_insert_size_record(
3103 b"read1",
3104 FLAG_PAIRED | FLAG_READ2 | FLAG_REVERSE,
3105 1,
3106 100,
3107 Some(0),
3108 );
3109
3110 let insert_size = compute_insert_size(&r1, &r2);
3111 assert_eq!(insert_size, -401);
3115 }
3116
3117 #[test]
3119 fn test_compute_insert_size_different_references() {
3120 let r1 = create_insert_size_record(b"read1", FLAG_PAIRED | FLAG_READ1, 100, 100, Some(0));
3121 let r2 = create_insert_size_record(
3122 b"read1",
3123 FLAG_PAIRED | FLAG_READ2 | FLAG_REVERSE,
3124 200,
3125 100,
3126 Some(1), );
3128
3129 let insert_size = compute_insert_size(&r1, &r2);
3130 assert_eq!(insert_size, 0, "Insert size should be 0 for different references");
3131 }
3132
3133 #[test]
3135 fn test_compute_insert_size_r1_unmapped() {
3136 let r1 = create_insert_size_record(
3137 b"read1",
3138 FLAG_PAIRED | FLAG_READ1 | FLAG_UNMAPPED,
3139 100,
3140 100,
3141 Some(0),
3142 );
3143 let r2 = create_insert_size_record(
3144 b"read1",
3145 FLAG_PAIRED | FLAG_READ2 | FLAG_REVERSE,
3146 200,
3147 100,
3148 Some(0),
3149 );
3150
3151 let insert_size = compute_insert_size(&r1, &r2);
3152 assert_eq!(insert_size, 0, "Insert size should be 0 when R1 is unmapped");
3153 }
3154
3155 #[test]
3157 fn test_compute_insert_size_r2_unmapped() {
3158 let r1 = create_insert_size_record(b"read1", FLAG_PAIRED | FLAG_READ1, 100, 100, Some(0));
3159 let r2 = create_insert_size_record(
3160 b"read1",
3161 FLAG_PAIRED | FLAG_READ2 | FLAG_UNMAPPED,
3162 200,
3163 100,
3164 Some(0),
3165 );
3166
3167 let insert_size = compute_insert_size(&r1, &r2);
3168 assert_eq!(insert_size, 0, "Insert size should be 0 when R2 is unmapped");
3169 }
3170
3171 #[test]
3173 fn test_compute_insert_size_both_unmapped() {
3174 let r1 = create_insert_size_record(
3175 b"read1",
3176 FLAG_PAIRED | FLAG_READ1 | FLAG_UNMAPPED,
3177 100,
3178 100,
3179 Some(0),
3180 );
3181 let r2 = create_insert_size_record(
3182 b"read1",
3183 FLAG_PAIRED | FLAG_READ2 | FLAG_UNMAPPED,
3184 200,
3185 100,
3186 Some(0),
3187 );
3188
3189 let insert_size = compute_insert_size(&r1, &r2);
3190 assert_eq!(insert_size, 0, "Insert size should be 0 when both reads are unmapped");
3191 }
3192
3193 #[test]
3195 fn test_compute_insert_size_with_indels() {
3196 let r1 = RecordBuilder::new()
3198 .name("read1")
3199 .sequence(&"A".repeat(100))
3200 .qualities(&[30u8; 100])
3201 .reference_sequence_id(0)
3202 .alignment_start(1)
3203 .cigar("50M10I40M")
3204 .first_segment(true)
3205 .build();
3206
3207 let mut r2 = RecordBuilder::new()
3209 .name("read1")
3210 .sequence(&"A".repeat(100))
3211 .qualities(&[30u8; 100])
3212 .reference_sequence_id(0)
3213 .alignment_start(200)
3214 .cigar("50M5D50M")
3215 .first_segment(false)
3216 .reverse_complement(true)
3217 .build();
3218 *r2.flags_mut() = Flags::from(FLAG_PAIRED | FLAG_READ2 | FLAG_REVERSE);
3220
3221 let insert_size = compute_insert_size(&r1, &r2);
3222 assert_eq!(insert_size, 304);
3226 }
3227
3228 #[test]
3234 fn test_fix_mate_info_sets_tlen_normal_innie() -> Result<()> {
3235 let r1 = create_insert_size_record(b"read1", FLAG_PAIRED | FLAG_READ1, 1, 100, Some(0));
3236 let r2 = create_insert_size_record(
3237 b"read1",
3238 FLAG_PAIRED | FLAG_READ2 | FLAG_REVERSE,
3239 500,
3240 100,
3241 Some(0),
3242 );
3243
3244 let mut builder = Builder::new();
3245 builder.push(r1)?;
3246 builder.push(r2)?;
3247 let mut template = builder.build()?;
3248
3249 template.fix_mate_info()?;
3250
3251 assert_eq!(template.records[0].template_length(), 599, "R1 TLEN");
3253 assert_eq!(template.records[1].template_length(), -599, "R2 TLEN");
3254
3255 Ok(())
3256 }
3257
3258 #[test]
3260 fn test_fix_mate_info_sets_mate_cigar() -> Result<()> {
3261 let mc_tag = Tag::new(b'M', b'C');
3262
3263 let r1 = RecordBuilder::new()
3264 .name("read1")
3265 .sequence(&"A".repeat(50))
3266 .qualities(&[30u8; 50])
3267 .reference_sequence_id(0)
3268 .alignment_start(1)
3269 .cigar("50M")
3270 .first_segment(true)
3271 .build();
3272
3273 let mut r2 = RecordBuilder::new()
3275 .name("read1")
3276 .sequence(&"A".repeat(50))
3277 .qualities(&[30u8; 50])
3278 .reference_sequence_id(0)
3279 .alignment_start(500)
3280 .cigar("25M5I20M")
3281 .first_segment(false)
3282 .reverse_complement(true)
3283 .build();
3284 *r2.flags_mut() = Flags::from(FLAG_PAIRED | FLAG_READ2 | FLAG_REVERSE);
3286
3287 let mut builder = Builder::new();
3288 builder.push(r1)?;
3289 builder.push(r2)?;
3290 let mut template = builder.build()?;
3291
3292 template.fix_mate_info()?;
3293
3294 let r1_mc = template.records[0].data().get(&mc_tag);
3296 assert!(r1_mc.is_some(), "R1 should have MC tag");
3297 if let Some(BufValue::String(mc)) = r1_mc {
3298 assert_eq!(&mc[..], b"25M5I20M", "R1 MC should be R2's CIGAR");
3299 } else {
3300 panic!("MC tag should be a string");
3301 }
3302
3303 let r2_mc = template.records[1].data().get(&mc_tag);
3305 assert!(r2_mc.is_some(), "R2 should have MC tag");
3306 if let Some(BufValue::String(mc)) = r2_mc {
3307 assert_eq!(&mc[..], b"50M", "R2 MC should be R1's CIGAR");
3308 } else {
3309 panic!("MC tag should be a string");
3310 }
3311
3312 Ok(())
3313 }
3314
3315 #[test]
3317 fn test_fix_mate_info_both_unmapped() -> Result<()> {
3318 let mq_tag = Tag::new(b'M', b'Q');
3319 let mc_tag = Tag::new(b'M', b'C');
3320
3321 let mut r1 = create_test_record(b"read1", FLAG_PAIRED | FLAG_READ1 | FLAG_UNMAPPED);
3322 *r1.reference_sequence_id_mut() = Some(0); *r1.data_mut() = [(mq_tag, BufValue::Int32(30))].into_iter().collect(); let mut r2 = create_test_record(b"read1", FLAG_PAIRED | FLAG_READ2 | FLAG_UNMAPPED);
3326 *r2.reference_sequence_id_mut() = Some(0);
3327 *r2.data_mut() = [(mc_tag, BufValue::String("100M".into()))].into_iter().collect();
3328
3329 let mut builder = Builder::new();
3330 builder.push(r1)?;
3331 builder.push(r2)?;
3332 let mut template = builder.build()?;
3333
3334 template.fix_mate_info()?;
3335
3336 assert!(template.records[0].reference_sequence_id().is_none(), "R1 ref should be cleared");
3338 assert!(template.records[1].reference_sequence_id().is_none(), "R2 ref should be cleared");
3339
3340 assert!(template.records[0].flags().is_mate_unmapped(), "R1 mate_unmapped");
3342 assert!(template.records[1].flags().is_mate_unmapped(), "R2 mate_unmapped");
3343
3344 assert!(template.records[0].data().get(&mq_tag).is_none(), "R1 MQ should be removed");
3346 assert!(template.records[1].data().get(&mc_tag).is_none(), "R2 MC should be removed");
3347
3348 assert_eq!(template.records[0].template_length(), 0, "R1 TLEN should be 0");
3350 assert_eq!(template.records[1].template_length(), 0, "R2 TLEN should be 0");
3351
3352 Ok(())
3353 }
3354
3355 #[test]
3357 fn test_fix_mate_info_one_unmapped() -> Result<()> {
3358 let mq_tag = Tag::new(b'M', b'Q');
3359 let mc_tag = Tag::new(b'M', b'C');
3360
3361 let r1 = create_mapped_record(b"read1", FLAG_PAIRED | FLAG_READ1, 100, 30);
3363
3364 let r2 = create_test_record(b"read1", FLAG_PAIRED | FLAG_READ2 | FLAG_UNMAPPED);
3366
3367 let mut builder = Builder::new();
3368 builder.push(r1)?;
3369 builder.push(r2)?;
3370 let mut template = builder.build()?;
3371
3372 template.fix_mate_info()?;
3373
3374 assert_eq!(
3376 template.records[1].reference_sequence_id(),
3377 Some(0),
3378 "R2 should be placed at R1's reference"
3379 );
3380 assert_eq!(
3381 template.records[1].alignment_start().map(|p| p.get()),
3382 Some(100),
3383 "R2 should be placed at R1's position"
3384 );
3385
3386 assert!(
3388 template.records[0].data().get(&mq_tag).is_none(),
3389 "R1 should NOT have MQ when mate is unmapped"
3390 );
3391
3392 assert!(
3394 template.records[1].data().get(&mq_tag).is_some(),
3395 "R2 should have MQ when mate is mapped"
3396 );
3397
3398 assert!(template.records[0].flags().is_mate_unmapped(), "R1 mate_unmapped should be set");
3400
3401 assert!(
3403 !template.records[1].flags().is_mate_unmapped(),
3404 "R2 mate_unmapped should NOT be set"
3405 );
3406
3407 assert_eq!(template.records[0].template_length(), 0, "R1 TLEN");
3409 assert_eq!(template.records[1].template_length(), 0, "R2 TLEN");
3410
3411 assert!(
3413 template.records[0].data().get(&mc_tag).is_none(),
3414 "R1 should NOT have MC when mate is unmapped"
3415 );
3416 assert!(
3417 template.records[1].data().get(&mc_tag).is_some(),
3418 "R2 should have MC when mate is mapped"
3419 );
3420
3421 Ok(())
3422 }
3423
3424 #[test]
3429 fn test_pair_orientation_fr_pair() -> Result<()> {
3430 let r1 = create_mapped_record_with_flags(
3434 b"read1",
3435 FLAG_PAIRED | FLAG_READ1 | FLAG_MATE_REVERSE,
3436 100,
3437 30,
3438 200, Some(0),
3440 Some(200),
3441 );
3442 let r2 = create_mapped_record_with_flags(
3443 b"read1",
3444 FLAG_PAIRED | FLAG_READ2 | FLAG_REVERSE,
3445 200,
3446 30,
3447 -200,
3448 Some(0),
3449 Some(100),
3450 );
3451
3452 let template = Template::from_records(vec![r1, r2])?;
3453 assert_eq!(template.pair_orientation(), Some(PairOrientation::FR));
3454 Ok(())
3455 }
3456
3457 #[test]
3460 fn test_pair_orientation_rf_pair() -> Result<()> {
3461 let r1 = create_mapped_record_with_flags(
3465 b"read1",
3466 FLAG_PAIRED | FLAG_READ1 | FLAG_MATE_REVERSE,
3467 300,
3468 30,
3469 -200, Some(0),
3471 Some(100),
3472 );
3473 let r2 = create_mapped_record_with_flags(
3474 b"read1",
3475 FLAG_PAIRED | FLAG_READ2 | FLAG_REVERSE,
3476 100,
3477 30,
3478 200,
3479 Some(0),
3480 Some(300),
3481 );
3482
3483 let template = Template::from_records(vec![r1, r2])?;
3484 assert_eq!(template.pair_orientation(), Some(PairOrientation::RF));
3485 Ok(())
3486 }
3487
3488 #[test]
3490 fn test_pair_orientation_tandem_both_forward() -> Result<()> {
3491 let r1 = create_mapped_record_with_flags(
3493 b"read1",
3494 FLAG_PAIRED | FLAG_READ1, 100,
3496 30,
3497 100,
3498 Some(0),
3499 Some(200),
3500 );
3501 let r2 = create_mapped_record_with_flags(
3502 b"read1",
3503 FLAG_PAIRED | FLAG_READ2, 200,
3505 30,
3506 -100,
3507 Some(0),
3508 Some(100),
3509 );
3510
3511 let template = Template::from_records(vec![r1, r2])?;
3512 assert_eq!(template.pair_orientation(), Some(PairOrientation::Tandem));
3513 Ok(())
3514 }
3515
3516 #[test]
3518 fn test_pair_orientation_tandem_both_reverse() -> Result<()> {
3519 let r1 = create_mapped_record_with_flags(
3521 b"read1",
3522 FLAG_PAIRED | FLAG_READ1 | FLAG_REVERSE | FLAG_MATE_REVERSE,
3523 100,
3524 30,
3525 100,
3526 Some(0),
3527 Some(200),
3528 );
3529 let r2 = create_mapped_record_with_flags(
3530 b"read1",
3531 FLAG_PAIRED | FLAG_READ2 | FLAG_REVERSE | FLAG_MATE_REVERSE,
3532 200,
3533 30,
3534 -100,
3535 Some(0),
3536 Some(100),
3537 );
3538
3539 let template = Template::from_records(vec![r1, r2])?;
3540 assert_eq!(template.pair_orientation(), Some(PairOrientation::Tandem));
3541 Ok(())
3542 }
3543
3544 #[test]
3546 fn test_pair_orientation_no_r1() -> Result<()> {
3547 let r2 = create_mapped_record_with_flags(
3548 b"read1",
3549 FLAG_PAIRED | FLAG_READ2 | FLAG_REVERSE,
3550 100,
3551 30,
3552 0,
3553 Some(0),
3554 Some(100),
3555 );
3556
3557 let template = Template::from_records(vec![r2])?;
3558 assert_eq!(template.pair_orientation(), None);
3559 Ok(())
3560 }
3561
3562 #[test]
3564 fn test_pair_orientation_no_r2() -> Result<()> {
3565 let r1 = create_mapped_record_with_flags(
3566 b"read1",
3567 FLAG_PAIRED | FLAG_READ1,
3568 100,
3569 30,
3570 0,
3571 Some(0),
3572 Some(100),
3573 );
3574
3575 let template = Template::from_records(vec![r1])?;
3576 assert_eq!(template.pair_orientation(), None);
3577 Ok(())
3578 }
3579
3580 #[test]
3582 fn test_pair_orientation_r1_unmapped() -> Result<()> {
3583 let r1 = create_mapped_record_with_flags(
3584 b"read1",
3585 FLAG_PAIRED | FLAG_READ1 | FLAG_UNMAPPED,
3586 100,
3587 30,
3588 0,
3589 Some(0),
3590 Some(100),
3591 );
3592 let r2 = create_mapped_record_with_flags(
3593 b"read1",
3594 FLAG_PAIRED | FLAG_READ2,
3595 100,
3596 30,
3597 0,
3598 Some(0),
3599 Some(100),
3600 );
3601
3602 let template = Template::from_records(vec![r1, r2])?;
3603 assert_eq!(template.pair_orientation(), None);
3604 Ok(())
3605 }
3606
3607 #[test]
3609 fn test_pair_orientation_r2_unmapped() -> Result<()> {
3610 let r1 = create_mapped_record_with_flags(
3611 b"read1",
3612 FLAG_PAIRED | FLAG_READ1,
3613 100,
3614 30,
3615 0,
3616 Some(0),
3617 Some(100),
3618 );
3619 let r2 = create_mapped_record_with_flags(
3620 b"read1",
3621 FLAG_PAIRED | FLAG_READ2 | FLAG_UNMAPPED,
3622 100,
3623 30,
3624 0,
3625 Some(0),
3626 Some(100),
3627 );
3628
3629 let template = Template::from_records(vec![r1, r2])?;
3630 assert_eq!(template.pair_orientation(), None);
3631 Ok(())
3632 }
3633
3634 #[test]
3636 fn test_pair_orientation_different_chromosomes() -> Result<()> {
3637 let mut r1 = create_mapped_record_with_flags(
3638 b"read1",
3639 FLAG_PAIRED | FLAG_READ1 | FLAG_MATE_REVERSE,
3640 100,
3641 30,
3642 200,
3643 Some(1), Some(200),
3645 );
3646 *r1.reference_sequence_id_mut() = Some(0);
3648
3649 let mut r2 = create_mapped_record_with_flags(
3650 b"read1",
3651 FLAG_PAIRED | FLAG_READ2 | FLAG_REVERSE,
3652 200,
3653 30,
3654 -200,
3655 Some(0),
3656 Some(100),
3657 );
3658 *r2.reference_sequence_id_mut() = Some(1);
3660
3661 let template = Template::from_records(vec![r1, r2])?;
3662 assert_eq!(template.pair_orientation(), None);
3663 Ok(())
3664 }
3665
3666 #[test]
3668 fn test_pair_orientation_fr_from_reverse_read() -> Result<()> {
3669 let r1 = create_mapped_record_with_flags(
3673 b"read1",
3674 FLAG_PAIRED | FLAG_READ1 | FLAG_REVERSE, 200,
3676 30,
3677 -200, Some(0),
3679 Some(100),
3680 );
3681 let r2 = create_mapped_record_with_flags(
3682 b"read1",
3683 FLAG_PAIRED | FLAG_READ2 | FLAG_MATE_REVERSE, 100,
3685 30,
3686 200,
3687 Some(0),
3688 Some(200),
3689 );
3690
3691 let template = Template::from_records(vec![r1, r2])?;
3692 assert_eq!(template.pair_orientation(), Some(PairOrientation::FR));
3693 Ok(())
3694 }
3695
3696 #[allow(clippy::cast_possible_truncation)]
3702 fn make_minimal_raw_bam(name: &[u8], flags: u16) -> Vec<u8> {
3703 let l_read_name = (name.len() + 1) as u8; let total = 32 + l_read_name as usize; let mut buf = vec![0u8; total];
3706
3707 buf[8] = l_read_name;
3708 buf[14..16].copy_from_slice(&flags.to_le_bytes());
3709
3710 let name_start = 32;
3711 buf[name_start..name_start + name.len()].copy_from_slice(name);
3712 buf[name_start + name.len()] = 0; buf
3715 }
3716
3717 #[test]
3718 fn test_from_raw_records_creates_raw_mode_template() {
3719 let raw = make_minimal_raw_bam(b"read1", FLAG_PAIRED | FLAG_READ1);
3720 let template = Template::from_raw_records(vec![raw]).unwrap();
3721
3722 assert!(template.is_raw_byte_mode());
3724 assert!(template.records.is_empty());
3726 assert!(template.raw_r1().is_some());
3728 }
3729
3730 #[test]
3731 fn test_r1_accessor_returns_none_in_raw_mode() {
3732 let raw = make_minimal_raw_bam(b"read1", FLAG_PAIRED | FLAG_READ1);
3734 let template = Template::from_raw_records(vec![raw]).unwrap();
3735
3736 assert!(template.r1().is_none());
3738 assert!(template.raw_r1().is_some());
3740 }
3741
3742 #[test]
3743 fn test_r2_accessor_returns_none_in_raw_mode() {
3744 let r1 = make_minimal_raw_bam(b"read1", FLAG_PAIRED | FLAG_READ1);
3746 let r2 = make_minimal_raw_bam(b"read1", FLAG_PAIRED | FLAG_READ2);
3747 let template = Template::from_raw_records(vec![r1, r2]).unwrap();
3748
3749 assert!(template.is_raw_byte_mode());
3750
3751 assert!(template.r2().is_none());
3753 assert!(template.raw_r2().is_some());
3755 }
3756
3757 #[test]
3762 fn test_from_raw_records_fast_path_normal_order() {
3763 let r1 = make_minimal_raw_bam(b"read1", FLAG_PAIRED | FLAG_READ1);
3765 let r2 = make_minimal_raw_bam(b"read1", FLAG_PAIRED | FLAG_READ2);
3766 let template = Template::from_raw_records(vec![r1, r2]).unwrap();
3767
3768 assert!(template.is_raw_byte_mode());
3769 assert!(template.raw_r1().is_some());
3770 assert!(template.raw_r2().is_some());
3771 let r1_flags = crate::sort::bam_fields::flags(template.raw_r1().unwrap());
3773 assert_ne!(r1_flags & FLAG_READ1, 0);
3774 }
3775
3776 #[test]
3777 fn test_from_raw_records_fast_path_swap() {
3778 let r1 = make_minimal_raw_bam(b"read1", FLAG_PAIRED | FLAG_READ1);
3780 let r2 = make_minimal_raw_bam(b"read1", FLAG_PAIRED | FLAG_READ2);
3781 let template = Template::from_raw_records(vec![r2, r1]).unwrap();
3782
3783 assert!(template.is_raw_byte_mode());
3784 let r1_flags = crate::sort::bam_fields::flags(template.raw_r1().unwrap());
3786 assert_ne!(r1_flags & FLAG_READ1, 0);
3787 let r2_flags = crate::sort::bam_fields::flags(template.raw_r2().unwrap());
3788 assert_ne!(r2_flags & FLAG_READ2, 0);
3789 }
3790
3791 #[test]
3792 fn test_from_raw_records_fast_path_fallthrough_both_r1() {
3793 let r1a = make_minimal_raw_bam(b"read1", FLAG_PAIRED | FLAG_READ1);
3795 let r1b = make_minimal_raw_bam(b"read1", FLAG_PAIRED | FLAG_READ1);
3796 let result = Template::from_raw_records(vec![r1a, r1b]);
3797 assert!(result.is_err());
3798 }
3799
3800 #[test]
3801 fn test_from_raw_records_fast_path_with_secondary() {
3802 let r1 = make_minimal_raw_bam(b"read1", FLAG_PAIRED | FLAG_READ1);
3804 let sec = make_minimal_raw_bam(
3805 b"read1",
3806 FLAG_PAIRED | FLAG_READ1 | crate::sort::bam_fields::flags::SECONDARY,
3807 );
3808 let template = Template::from_raw_records(vec![r1, sec]).unwrap();
3809 assert!(template.is_raw_byte_mode());
3810 assert!(template.raw_r1().is_some());
3811 }
3812
3813 #[test]
3814 fn test_from_raw_records_with_supplementary() {
3815 let r1 = make_minimal_raw_bam(b"read1", FLAG_PAIRED | FLAG_READ1);
3817 let r1_supp = make_minimal_raw_bam(
3818 b"read1",
3819 FLAG_PAIRED | FLAG_READ1 | crate::sort::bam_fields::flags::SUPPLEMENTARY,
3820 );
3821 let r2 = make_minimal_raw_bam(b"read1", FLAG_PAIRED | FLAG_READ2);
3822 let template = Template::from_raw_records(vec![r1, r1_supp, r2]).unwrap();
3823 assert!(template.is_raw_byte_mode());
3824 assert!(template.raw_r1().is_some());
3825 assert!(template.raw_r2().is_some());
3826 }
3827
3828 #[test]
3829 fn test_from_raw_records_single_unpaired() {
3830 let r = make_minimal_raw_bam(b"read1", 0); let template = Template::from_raw_records(vec![r]).unwrap();
3833 assert!(template.is_raw_byte_mode());
3834 assert!(template.raw_r1().is_some());
3835 assert!(template.raw_r2().is_none());
3836 }
3837
3838 #[test]
3839 fn test_from_raw_records_empty() {
3840 let result = Template::from_raw_records(vec![]);
3841 assert!(result.is_err());
3842 }
3843
3844 #[test]
3845 fn test_from_raw_records_name_mismatch() {
3846 let r1 = make_minimal_raw_bam(b"read1", FLAG_PAIRED | FLAG_READ1);
3848 let r1_supp = make_minimal_raw_bam(
3849 b"read1",
3850 FLAG_PAIRED | FLAG_READ1 | crate::sort::bam_fields::flags::SUPPLEMENTARY,
3851 );
3852 let r2_wrong = make_minimal_raw_bam(b"read2", FLAG_PAIRED | FLAG_READ2);
3853 let result = Template::from_raw_records(vec![r1, r1_supp, r2_wrong]);
3854 assert!(result.is_err());
3855 }
3856
3857 #[test]
3858 fn test_from_raw_records_all_raw_records_mut() {
3859 let r1 = make_minimal_raw_bam(b"read1", FLAG_PAIRED | FLAG_READ1);
3860 let r2 = make_minimal_raw_bam(b"read1", FLAG_PAIRED | FLAG_READ2);
3861 let mut template = Template::from_raw_records(vec![r1, r2]).unwrap();
3862 let recs = template.all_raw_records_mut().unwrap();
3864 assert_eq!(recs.len(), 2);
3865 }
3866
3867 #[test]
3868 fn test_from_raw_records_into_raw_records() {
3869 let r1 = make_minimal_raw_bam(b"read1", FLAG_PAIRED | FLAG_READ1);
3870 let r2 = make_minimal_raw_bam(b"read1", FLAG_PAIRED | FLAG_READ2);
3871 let template = Template::from_raw_records(vec![r1, r2]).unwrap();
3872 let recs = template.into_raw_records().unwrap();
3873 assert_eq!(recs.len(), 2);
3874 }
3875
3876 #[test]
3877 fn test_from_raw_records_truncated_header() {
3878 let short = vec![0u8; 20];
3880 let err = Template::from_raw_records(vec![short]).unwrap_err();
3881 assert!(err.to_string().contains("too short"), "Error: {err}");
3882 }
3883
3884 #[test]
3885 fn test_from_raw_records_truncated_read_name() {
3886 let mut buf = vec![0u8; 34]; buf[8] = 10; let err = Template::from_raw_records(vec![buf]).unwrap_err();
3890 assert!(err.to_string().contains("truncated"), "Error: {err}");
3891 }
3892
3893 #[test]
3894 fn test_from_raw_records_valid_l_read_name() {
3895 let rec = make_minimal_raw_bam(b"test", FLAG_PAIRED | FLAG_READ1);
3897 assert!(Template::from_raw_records(vec![rec]).is_ok());
3898 }
3899
3900 #[test]
3905 fn test_write_with_offset_none() {
3906 let mi = MoleculeId::None;
3907 let mut buf = String::new();
3908 let result = mi.write_with_offset(100, &mut buf);
3909 assert!(result.is_empty());
3910 }
3911
3912 #[test]
3913 fn test_write_with_offset_single() {
3914 let mi = MoleculeId::Single(5);
3915 let mut buf = String::new();
3916 let result = mi.write_with_offset(100, &mut buf);
3917 assert_eq!(result, b"105");
3918 }
3919
3920 #[test]
3921 fn test_write_with_offset_paired_a() {
3922 let mi = MoleculeId::PairedA(3);
3923 let mut buf = String::new();
3924 let result = mi.write_with_offset(10, &mut buf);
3925 assert_eq!(result, b"13/A");
3926 }
3927
3928 #[test]
3929 fn test_write_with_offset_paired_b() {
3930 let mi = MoleculeId::PairedB(3);
3931 let mut buf = String::new();
3932 let result = mi.write_with_offset(10, &mut buf);
3933 assert_eq!(result, b"13/B");
3934 }
3935
3936 #[test]
3937 fn test_write_with_offset_reuses_buffer() {
3938 let mut buf = String::new();
3939 let mi1 = MoleculeId::Single(1);
3940 let _ = mi1.write_with_offset(0, &mut buf);
3941 assert_eq!(buf, "1");
3942
3943 let mi2 = MoleculeId::PairedA(99);
3945 let result = mi2.write_with_offset(0, &mut buf);
3946 assert_eq!(result, b"99/A");
3947 assert_eq!(buf, "99/A");
3948 }
3949
3950 #[test]
3951 fn test_write_with_offset_matches_to_string() {
3952 let mut buf = String::new();
3954 for mi in [
3955 MoleculeId::None,
3956 MoleculeId::Single(0),
3957 MoleculeId::Single(42),
3958 MoleculeId::PairedA(7),
3959 MoleculeId::PairedB(7),
3960 ] {
3961 let expected = mi.to_string_with_offset(100);
3962 let result = mi.write_with_offset(100, &mut buf);
3963 assert_eq!(result, expected.as_bytes(), "Mismatch for {mi:?}");
3964 }
3965 }
3966
3967 #[test]
3968 fn test_records_in_range_invalid_start_gt_end() {
3969 let template = Template {
3970 name: b"read1".to_vec(),
3971 records: vec![
3972 create_test_record(b"read1", FLAG_PAIRED | FLAG_READ1),
3973 create_test_record(b"read1", FLAG_PAIRED | FLAG_READ2),
3974 ],
3975 raw_records: None,
3976 r1: Some((0, 1)),
3977 r2: Some((1, 2)),
3978 r1_supplementals: None,
3979 r2_supplementals: None,
3980 r1_secondaries: None,
3981 r2_secondaries: None,
3982 mi: MoleculeId::None,
3983 };
3984
3985 assert!(template.records_in_range(Some((2, 1))).is_empty());
3987 assert_eq!(template.records_in_range(Some((0, 2))).len(), 2);
3989 assert!(template.records_in_range(None).is_empty());
3991 assert!(template.records_in_range(Some((0, 10))).is_empty());
3993 }
3994}