1use noodles::sam::alignment::RecordBuf;
10use noodles::sam::alignment::record::Cigar as CigarTrait;
11use noodles::sam::alignment::record::cigar::op::Kind;
12use noodles::sam::alignment::record::data::field::Tag;
13use noodles::sam::alignment::record_buf::data::field::Value;
14
15#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
22pub enum PairOrientation {
23 FR,
29
30 RF,
36
37 Tandem,
43}
44
45#[must_use]
61pub fn read_pos_at_ref_pos(
62 record: &RecordBuf,
63 ref_pos: usize,
64 return_last_base_if_deleted: bool,
65) -> usize {
66 let Some(alignment_start) = record.alignment_start().map(usize::from) else {
67 return 0;
68 };
69
70 let mut read_pos: usize = 0; let mut ref_cursor = alignment_start; let mut last_aligned_read_pos: usize = 0;
74
75 for op_result in record.cigar().iter() {
76 let Ok(op) = op_result else {
77 continue;
78 };
79 let len = op.len();
80
81 match op.kind() {
82 Kind::Match | Kind::SequenceMatch | Kind::SequenceMismatch => {
84 if ref_pos >= ref_cursor && ref_pos < ref_cursor + len {
86 let offset = ref_pos - ref_cursor;
88 return read_pos + offset + 1; }
90 last_aligned_read_pos = read_pos + len; read_pos += len;
92 ref_cursor += len;
93 }
94 Kind::Insertion | Kind::SoftClip => {
96 read_pos += len;
97 }
98 Kind::Deletion | Kind::Skip => {
100 if ref_pos >= ref_cursor && ref_pos < ref_cursor + len {
102 return if return_last_base_if_deleted && last_aligned_read_pos > 0 {
103 last_aligned_read_pos } else {
105 0
106 };
107 }
108 ref_cursor += len;
109 }
110 Kind::HardClip | Kind::Pad => {}
112 }
113 }
114
115 0
117}
118
119#[must_use]
133pub fn is_fr_pair_from_tags(read: &RecordBuf) -> bool {
134 let flags = read.flags();
135
136 if !flags.is_segmented() {
138 return false;
139 }
140
141 if flags.is_unmapped() || flags.is_mate_unmapped() {
143 return false;
144 }
145
146 if read.reference_sequence_id() != read.mate_reference_sequence_id() {
148 return false;
149 }
150
151 get_pair_orientation(read) == PairOrientation::FR
153}
154
155fn parse_mate_cigar(read: &RecordBuf) -> Option<(Vec<(Kind, usize)>, usize)> {
160 let mc_tag = Tag::from([b'M', b'C']);
161 let mc_value = read.data().get(&mc_tag)?;
162
163 let cigar_str = match mc_value {
164 Value::String(s) => std::str::from_utf8(s.as_ref()).ok()?,
165 _ => return None,
166 };
167
168 let mate_start = usize::from(read.mate_alignment_start()?);
169 let ops = parse_cigar_string(cigar_str);
170 if ops.is_empty() {
171 return None;
172 }
173 Some((ops, mate_start))
174}
175
176#[must_use]
188pub fn mate_unclipped_start(read: &RecordBuf) -> Option<isize> {
189 let (ops, mate_start) = parse_mate_cigar(read)?;
190
191 #[expect(
192 clippy::cast_possible_wrap,
193 reason = "genomic positions fit in isize on all supported platforms (64-bit)"
194 )]
195 let mate_start_signed = mate_start as isize;
196 #[expect(
197 clippy::cast_possible_wrap,
198 reason = "CIGAR clipping lengths are small relative to isize::MAX"
199 )]
200 let leading_clip = leading_clipping(&ops) as isize;
201
202 Some(mate_start_signed - leading_clip)
203}
204
205#[must_use]
211pub fn mate_unclipped_end(read: &RecordBuf) -> Option<usize> {
212 let (ops, mate_start) = parse_mate_cigar(read)?;
213 let ref_len = cigar_reference_length(&ops);
214 let trailing_clip = trailing_clipping(&ops);
215
216 Some(mate_start + ref_len - 1 + trailing_clip)
218}
219
220#[must_use]
227pub fn num_bases_extending_past_mate(read: &RecordBuf) -> usize {
228 if !is_fr_pair_from_tags(read) {
230 return 0;
231 }
232
233 let is_positive_strand = !read.flags().is_reverse_complemented();
234
235 let ops: Vec<(Kind, usize)> = read
237 .cigar()
238 .iter()
239 .filter_map(std::result::Result::ok)
240 .map(|op| (op.kind(), op.len()))
241 .collect();
242
243 let read_length: usize = ops
245 .iter()
246 .filter(|(kind, _)| {
247 matches!(
248 kind,
249 Kind::Match
250 | Kind::SequenceMatch
251 | Kind::SequenceMismatch
252 | Kind::Insertion
253 | Kind::SoftClip
254 )
255 })
256 .map(|(_, len)| len)
257 .sum();
258
259 if is_positive_strand {
260 let Some(read_alignment_end) = alignment_end(read) else {
262 return 0;
263 };
264 let mate_unclipped_end_pos = mate_unclipped_end(read).unwrap_or(usize::MAX);
265
266 if read_alignment_end >= mate_unclipped_end_pos {
267 let pos_at_mate_end = read_pos_at_ref_pos(read, mate_unclipped_end_pos, false);
271 read_length.saturating_sub(pos_at_mate_end)
274 } else {
275 let trailing_soft_clip = trailing_soft_clipping(&ops);
278 let gap = mate_unclipped_end_pos - read_alignment_end;
279 trailing_soft_clip.saturating_sub(gap)
280 }
281 } else {
282 let Some(read_alignment_start) = read.alignment_start().map(usize::from) else {
284 return 0;
285 };
286 #[expect(
287 clippy::cast_possible_wrap,
288 reason = "genomic positions fit in isize on all supported platforms (64-bit)"
289 )]
290 let read_alignment_start_signed = read_alignment_start as isize;
291 let mate_unclipped_start_pos = mate_unclipped_start(read).unwrap_or(0);
292
293 if read_alignment_start_signed <= mate_unclipped_start_pos {
294 let lookup_pos = if mate_unclipped_start_pos < 0 {
300 0
301 } else {
302 #[expect(
303 clippy::cast_sign_loss,
304 reason = "value is verified non-negative by the if-check above"
305 )]
306 let pos = mate_unclipped_start_pos as usize;
307 pos
308 };
309 let pos_at_mate_start = read_pos_at_ref_pos(read, lookup_pos, false);
310 pos_at_mate_start.saturating_sub(1)
314 } else {
315 #[expect(
319 clippy::cast_possible_wrap,
320 reason = "soft clipping lengths are small relative to isize::MAX"
321 )]
322 let leading_soft_clip = leading_soft_clipping(&ops) as isize;
323 let gap = read_alignment_start_signed - mate_unclipped_start_pos;
324 if leading_soft_clip > gap {
327 #[expect(
328 clippy::cast_sign_loss,
329 reason = "result is guaranteed positive by the if-check above"
330 )]
331 let result = (leading_soft_clip - gap) as usize;
332 result
333 } else {
334 0
335 }
336 }
337 }
338}
339
340#[must_use]
344pub fn alignment_end(read: &RecordBuf) -> Option<usize> {
345 let start = usize::from(read.alignment_start()?);
346 let ref_len = reference_length(&read.cigar());
347 Some(start + ref_len - 1)
348}
349
350#[must_use]
354pub fn parse_cigar_string(cigar_str: &str) -> Vec<(Kind, usize)> {
355 let mut ops = Vec::new();
356 let mut num_str = String::new();
357
358 for ch in cigar_str.chars() {
359 if ch.is_ascii_digit() {
360 num_str.push(ch);
361 } else {
362 let len: usize = num_str.parse().unwrap_or(0);
363 num_str.clear();
364
365 let kind = match ch {
366 'M' => Kind::Match,
367 'I' => Kind::Insertion,
368 'D' => Kind::Deletion,
369 'N' => Kind::Skip,
370 'S' => Kind::SoftClip,
371 'H' => Kind::HardClip,
372 'P' => Kind::Pad,
373 '=' => Kind::SequenceMatch,
374 'X' => Kind::SequenceMismatch,
375 _ => continue,
376 };
377
378 if len > 0 {
379 ops.push((kind, len));
380 }
381 }
382 }
383
384 ops
385}
386
387#[must_use]
389pub fn cigar_reference_length(ops: &[(Kind, usize)]) -> usize {
390 ops.iter()
391 .filter_map(|(kind, len)| match kind {
392 Kind::Match
393 | Kind::Deletion
394 | Kind::Skip
395 | Kind::SequenceMatch
396 | Kind::SequenceMismatch => Some(*len),
397 _ => None,
398 })
399 .sum()
400}
401
402#[must_use]
404pub fn leading_clipping(ops: &[(Kind, usize)]) -> usize {
405 ops.iter()
406 .take_while(|(kind, _)| matches!(kind, Kind::SoftClip | Kind::HardClip))
407 .map(|(_, len)| *len)
408 .sum()
409}
410
411#[must_use]
413pub fn trailing_clipping(ops: &[(Kind, usize)]) -> usize {
414 ops.iter()
415 .rev()
416 .take_while(|(kind, _)| matches!(kind, Kind::SoftClip | Kind::HardClip))
417 .map(|(_, len)| *len)
418 .sum()
419}
420
421#[must_use]
423pub fn leading_soft_clipping(ops: &[(Kind, usize)]) -> usize {
424 ops.iter()
425 .skip_while(|(kind, _)| *kind == Kind::HardClip)
426 .take_while(|(kind, _)| *kind == Kind::SoftClip)
427 .map(|(_, len)| *len)
428 .sum()
429}
430
431#[must_use]
433pub fn trailing_soft_clipping(ops: &[(Kind, usize)]) -> usize {
434 ops.iter()
435 .rev()
436 .skip_while(|(kind, _)| *kind == Kind::HardClip)
437 .take_while(|(kind, _)| *kind == Kind::SoftClip)
438 .map(|(_, len)| *len)
439 .sum()
440}
441
442#[must_use]
444fn cigar_to_ops(record: &RecordBuf) -> Vec<(Kind, usize)> {
445 record.cigar().as_ref().iter().map(|op| (op.kind(), op.len())).collect()
446}
447
448#[must_use]
455pub fn unclipped_start(record: &RecordBuf) -> Option<usize> {
456 if record.flags().is_unmapped() {
457 return None;
458 }
459 let start = usize::from(record.alignment_start()?);
460 let leading = leading_clipping(&cigar_to_ops(record));
461 Some(start.saturating_sub(leading))
462}
463
464#[must_use]
471pub fn unclipped_end(record: &RecordBuf) -> Option<usize> {
472 if record.flags().is_unmapped() {
473 return None;
474 }
475 let start = usize::from(record.alignment_start()?);
476 let ref_len = reference_length(&record.cigar());
477 let trailing = trailing_clipping(&cigar_to_ops(record));
478 Some(start + ref_len.saturating_sub(1) + trailing)
481}
482
483#[must_use]
492pub fn unclipped_five_prime_position(record: &RecordBuf) -> Option<usize> {
493 if record.flags().is_unmapped() {
494 return None;
495 }
496 if record.flags().is_reverse_complemented() {
497 unclipped_end(record)
498 } else {
499 unclipped_start(record)
500 }
501}
502
503#[expect(
505 clippy::redundant_closure_for_method_calls,
506 reason = "Op::len is not a method on the trait"
507)]
508#[must_use]
509pub fn reference_length(cigar: &impl CigarTrait) -> usize {
510 cigar
511 .iter()
512 .filter_map(Result::ok)
513 .filter(|op| {
514 matches!(
515 op.kind(),
516 Kind::Match
517 | Kind::SequenceMatch
518 | Kind::SequenceMismatch
519 | Kind::Deletion
520 | Kind::Skip
521 )
522 })
523 .map(|op| op.len())
524 .sum()
525}
526
527#[must_use]
530pub fn get_pair_orientation(record: &RecordBuf) -> PairOrientation {
531 let is_reverse = record.flags().is_reverse_complemented();
532 let mate_reverse = record.flags().is_mate_reverse_complemented();
533
534 if is_reverse == mate_reverse {
536 return PairOrientation::Tandem;
537 }
538
539 let alignment_start = record.alignment_start().map_or(0, usize::from);
541 let mate_start = record.mate_alignment_start().map_or(0, usize::from);
542 let insert_size = record.template_length();
543
544 #[expect(
546 clippy::cast_possible_wrap,
547 reason = "genomic positions are non-negative and fit in i64 on all supported platforms"
548 )]
549 let (positive_five_prime, negative_five_prime) = if is_reverse {
550 let ref_len = reference_length(&record.cigar());
552 let end = alignment_start + ref_len.saturating_sub(1);
553 (mate_start as i64, end as i64)
554 } else {
555 (alignment_start as i64, alignment_start as i64 + i64::from(insert_size))
557 };
558
559 if positive_five_prime < negative_five_prime {
561 PairOrientation::FR
562 } else {
563 PairOrientation::RF
564 }
565}
566
567#[must_use]
574pub fn is_fr_pair(r1: &RecordBuf, r2: &RecordBuf) -> bool {
575 let r1_flags = r1.flags();
576 let r2_flags = r2.flags();
577
578 if !r1_flags.is_segmented() || !r2_flags.is_segmented() {
580 return false;
581 }
582
583 if r1_flags.is_unmapped() || r2_flags.is_unmapped() {
585 return false;
586 }
587
588 if r1_flags.is_mate_unmapped() || r2_flags.is_mate_unmapped() {
590 return false;
591 }
592
593 let r1_ref = r1.reference_sequence_id();
595 let r2_ref = r2.reference_sequence_id();
596 if r1_ref != r2_ref {
597 return false;
598 }
599
600 let orientation = get_pair_orientation(r1);
604 orientation == PairOrientation::FR
605}
606
607#[cfg(test)]
608mod tests {
609 use super::*;
610 use crate::builder::{RecordBuilder, RecordPairBuilder};
611 use noodles::sam::alignment::record::Flags;
612
613 const FLAG_PAIRED: u16 = 0x1;
615 const FLAG_READ1: u16 = 0x40;
616 const FLAG_REVERSE: u16 = 0x10;
617 const FLAG_MATE_REVERSE: u16 = 0x20;
618 const FLAG_UNMAPPED: u16 = 0x4;
619 const FLAG_MATE_UNMAPPED: u16 = 0x8;
620
621 #[test]
626 fn test_parse_cigar_string() {
627 let ops = parse_cigar_string("10M5I20M");
628 assert_eq!(ops.len(), 3);
629 assert_eq!(ops[0], (Kind::Match, 10));
630 assert_eq!(ops[1], (Kind::Insertion, 5));
631 assert_eq!(ops[2], (Kind::Match, 20));
632 }
633
634 #[test]
635 fn test_parse_cigar_string_with_clips() {
636 let ops = parse_cigar_string("5H10S50M10S5H");
637 assert_eq!(ops.len(), 5);
638 assert_eq!(ops[0], (Kind::HardClip, 5));
639 assert_eq!(ops[1], (Kind::SoftClip, 10));
640 assert_eq!(ops[2], (Kind::Match, 50));
641 assert_eq!(ops[3], (Kind::SoftClip, 10));
642 assert_eq!(ops[4], (Kind::HardClip, 5));
643 }
644
645 #[test]
646 fn test_cigar_reference_length() {
647 let ops = parse_cigar_string("10M5I20M5D10M");
648 assert_eq!(cigar_reference_length(&ops), 45); }
650
651 #[test]
652 fn test_leading_clipping() {
653 let ops = parse_cigar_string("5H10S50M10S5H");
654 assert_eq!(leading_clipping(&ops), 15); }
656
657 #[test]
658 fn test_trailing_clipping() {
659 let ops = parse_cigar_string("5H10S50M10S5H");
660 assert_eq!(trailing_clipping(&ops), 15); }
662
663 #[test]
664 fn test_leading_soft_clipping() {
665 let ops = parse_cigar_string("5H10S50M10S5H");
666 assert_eq!(leading_soft_clipping(&ops), 10); }
668
669 #[test]
670 fn test_trailing_soft_clipping() {
671 let ops = parse_cigar_string("5H10S50M10S5H");
672 assert_eq!(trailing_soft_clipping(&ops), 10); }
674
675 fn create_fr_test_read(
681 name: &str,
682 flags: u16,
683 pos: usize,
684 mate_pos: usize,
685 tlen: i32,
686 ref_id: usize,
687 mate_ref_id: usize,
688 ) -> RecordBuf {
689 let is_reverse = (flags & FLAG_REVERSE) != 0;
690 let is_mate_reverse = (flags & FLAG_MATE_REVERSE) != 0;
691 let is_unmapped = (flags & FLAG_UNMAPPED) != 0;
692 let is_mate_unmapped = (flags & FLAG_MATE_UNMAPPED) != 0;
693 let is_paired = (flags & FLAG_PAIRED) != 0;
694 let is_read1 = (flags & FLAG_READ1) != 0;
695
696 let mut builder = RecordBuilder::new()
697 .name(name)
698 .sequence(&"A".repeat(100))
699 .qualities(&[30u8; 100])
700 .cigar("100M")
701 .reference_sequence_id(ref_id)
702 .alignment_start(pos)
703 .mate_reference_sequence_id(mate_ref_id)
704 .mate_alignment_start(mate_pos)
705 .template_length(tlen)
706 .reverse_complement(is_reverse)
707 .mate_reverse_complement(is_mate_reverse)
708 .unmapped(is_unmapped)
709 .mate_unmapped(is_mate_unmapped);
710
711 if is_paired {
712 builder = builder.first_segment(is_read1);
713 }
714
715 builder.build()
716 }
717
718 #[test]
719 fn test_is_fr_pair_true_for_fr_orientation() {
720 let read = create_fr_test_read(
724 "fr_pair",
725 FLAG_PAIRED | FLAG_READ1 | FLAG_MATE_REVERSE,
726 100,
727 200,
728 300,
729 0,
730 0,
731 );
732 assert!(is_fr_pair_from_tags(&read), "Should be FR pair");
733 }
734
735 #[test]
736 fn test_is_fr_pair_false_for_rf_orientation() {
737 let read = create_fr_test_read(
739 "rf_pair",
740 FLAG_PAIRED | FLAG_READ1 | FLAG_MATE_REVERSE,
741 63837,
742 62870,
743 -967,
744 0,
745 0,
746 );
747 assert!(!is_fr_pair_from_tags(&read), "Should NOT be FR pair: RF orientation");
748 }
749
750 #[test]
751 fn test_is_fr_pair_false_for_tandem() {
752 let read = create_fr_test_read(
754 "tandem_pair",
755 FLAG_PAIRED | FLAG_READ1, 100,
757 200,
758 300,
759 0,
760 0,
761 );
762 assert!(!is_fr_pair_from_tags(&read), "Should NOT be FR pair: tandem");
763 }
764
765 #[test]
766 fn test_is_fr_pair_false_for_unpaired() {
767 let record = RecordBuilder::new().name("unpaired").sequence("ACGT").build();
769 assert!(!is_fr_pair_from_tags(&record), "Should NOT be FR pair: unpaired");
770 }
771
772 #[test]
773 fn test_is_fr_pair_false_for_unmapped() {
774 let read = create_fr_test_read(
775 "unmapped",
776 FLAG_PAIRED | FLAG_READ1 | FLAG_UNMAPPED | FLAG_MATE_REVERSE,
777 100,
778 200,
779 300,
780 0,
781 0,
782 );
783 assert!(!is_fr_pair_from_tags(&read), "Should NOT be FR pair: unmapped");
784 }
785
786 #[test]
787 fn test_is_fr_pair_false_for_mate_unmapped() {
788 let read = create_fr_test_read(
789 "mate_unmapped",
790 FLAG_PAIRED | FLAG_READ1 | FLAG_MATE_UNMAPPED | FLAG_MATE_REVERSE,
791 100,
792 200,
793 300,
794 0,
795 0,
796 );
797 assert!(!is_fr_pair_from_tags(&read), "Should NOT be FR pair: mate unmapped");
798 }
799
800 #[test]
801 fn test_is_fr_pair_false_for_different_references() {
802 let read = create_fr_test_read(
803 "diff_ref",
804 FLAG_PAIRED | FLAG_READ1 | FLAG_MATE_REVERSE,
805 100,
806 200,
807 300,
808 0,
809 1, );
811 assert!(!is_fr_pair_from_tags(&read), "Should NOT be FR pair: different refs");
812 }
813
814 #[test]
815 fn test_is_fr_pair_false_for_rf_when_read_on_negative_strand() {
816 let read = create_fr_test_read(
824 "rf_negative_strand",
825 FLAG_PAIRED | FLAG_READ1 | FLAG_REVERSE, 200, 400, -200, 0, 0, );
832
833 assert!(
834 !is_fr_pair_from_tags(&read),
835 "Should NOT be FR pair: RF orientation (mate 5' > read 5')"
836 );
837 }
838
839 #[test]
840 fn test_is_fr_pair_true_for_fr_when_read_on_negative_strand() {
841 let read = create_fr_test_read(
848 "fr_negative_strand",
849 FLAG_PAIRED | FLAG_READ1 | FLAG_REVERSE, 200, 100, 200, 0, 0, );
856
857 assert!(is_fr_pair_from_tags(&read), "Should be FR pair: mate 5' (100) < read end (300)");
858 }
859
860 #[test]
861 fn test_is_fr_pair_false_for_tandem_both_reverse() {
862 let read = create_fr_test_read(
864 "tandem_pair_reverse",
865 FLAG_PAIRED | FLAG_READ1 | FLAG_REVERSE | FLAG_MATE_REVERSE, 100,
867 200,
868 300,
869 0,
870 0,
871 );
872
873 assert!(
874 !is_fr_pair_from_tags(&read),
875 "Should NOT be FR pair: tandem orientation (both reverse)"
876 );
877 }
878
879 fn create_cigar_test_read(name: &str, pos: usize, cigar: &str) -> RecordBuf {
886 RecordBuilder::new()
887 .name(name)
888 .cigar(cigar)
889 .reference_sequence_id(0)
890 .alignment_start(pos)
891 .first_segment(true)
892 .build()
893 }
894
895 #[test]
896 fn test_read_pos_at_ref_pos_simple_match() {
897 let read = create_cigar_test_read("simple", 100, "50M");
899
900 assert_eq!(read_pos_at_ref_pos(&read, 100, false), 1);
901 assert_eq!(read_pos_at_ref_pos(&read, 110, false), 11);
902 assert_eq!(read_pos_at_ref_pos(&read, 149, false), 50);
903 assert_eq!(read_pos_at_ref_pos(&read, 150, false), 0); assert_eq!(read_pos_at_ref_pos(&read, 99, false), 0); }
906
907 #[test]
908 fn test_read_pos_at_ref_pos_with_insertion() {
909 let read = create_cigar_test_read("insertion", 100, "10M5I10M");
911
912 assert_eq!(read_pos_at_ref_pos(&read, 105, false), 6);
913 assert_eq!(read_pos_at_ref_pos(&read, 115, false), 21); }
915
916 #[test]
917 fn test_read_pos_at_ref_pos_with_deletion() {
918 let read = create_cigar_test_read("deletion", 100, "10M5D10M");
920
921 assert_eq!(read_pos_at_ref_pos(&read, 112, false), 0);
923 assert_eq!(read_pos_at_ref_pos(&read, 112, true), 10); assert_eq!(read_pos_at_ref_pos(&read, 115, false), 11); }
926
927 #[test]
928 fn test_read_pos_at_ref_pos_with_soft_clip() {
929 let read = create_cigar_test_read("softclip", 100, "5S10M");
931
932 assert_eq!(read_pos_at_ref_pos(&read, 100, false), 6); assert_eq!(read_pos_at_ref_pos(&read, 105, false), 11);
934 }
935
936 fn create_mc_test_read(name: &str, mate_pos: usize, mc_cigar: &str) -> RecordBuf {
942 RecordBuilder::new()
943 .name(name)
944 .sequence(&"A".repeat(50))
945 .qualities(&[30u8; 50])
946 .cigar("50M")
947 .reference_sequence_id(0)
948 .alignment_start(100)
949 .mate_reference_sequence_id(0)
950 .mate_alignment_start(mate_pos)
951 .first_segment(true)
952 .tag("MC", mc_cigar)
953 .build()
954 }
955
956 #[test]
957 fn test_mate_unclipped_start_no_clipping() {
958 let read = create_mc_test_read("no_clip", 200, "50M");
959 assert_eq!(mate_unclipped_start(&read), Some(200));
960 }
961
962 #[test]
963 fn test_mate_unclipped_start_with_soft_clip() {
964 let read = create_mc_test_read("soft_clip", 200, "10S50M");
966 assert_eq!(mate_unclipped_start(&read), Some(190));
967 }
968
969 #[test]
970 fn test_mate_unclipped_start_with_hard_clip() {
971 let read = create_mc_test_read("hard_clip", 200, "5H10S50M");
973 assert_eq!(mate_unclipped_start(&read), Some(185));
974 }
975
976 #[test]
977 fn test_mate_unclipped_end_no_clipping() {
978 let read = create_mc_test_read("no_clip", 200, "50M");
980 assert_eq!(mate_unclipped_end(&read), Some(249));
981 }
982
983 #[test]
984 fn test_mate_unclipped_end_with_soft_clip() {
985 let read = create_mc_test_read("soft_clip", 200, "50M10S");
987 assert_eq!(mate_unclipped_end(&read), Some(259));
988 }
989
990 #[test]
991 fn test_mate_unclipped_end_with_deletion() {
992 let read = create_mc_test_read("deletion", 200, "25M5D25M");
994 assert_eq!(mate_unclipped_end(&read), Some(254));
995 }
996
997 #[test]
998 fn test_mate_unclipped_end_with_trailing_hard_clip() {
999 let read = create_mc_test_read("hard_clip", 200, "50M5S3H");
1001 assert_eq!(mate_unclipped_end(&read), Some(257));
1002 }
1003
1004 #[test]
1005 fn test_mate_unclipped_no_mc_tag() {
1006 let record = RecordBuilder::new()
1008 .name("no_mc")
1009 .sequence("ACGT")
1010 .reference_sequence_id(0)
1011 .alignment_start(100)
1012 .mate_reference_sequence_id(0)
1013 .mate_alignment_start(200)
1014 .first_segment(true)
1015 .build();
1016
1017 assert_eq!(mate_unclipped_start(&record), None);
1018 assert_eq!(mate_unclipped_end(&record), None);
1019 }
1020
1021 #[test]
1022 fn test_mate_unclipped_invalid_utf8_mc_tag() {
1023 use noodles::sam::alignment::record_buf::data::field::value::Value;
1024 let mc_tag = Tag::from([b'M', b'C']);
1025 let mut record = create_mc_test_read("invalid_utf8", 200, "50M");
1027 record.data_mut().insert(mc_tag, Value::String(vec![0xFF, 0xFE].into()));
1028
1029 assert_eq!(mate_unclipped_start(&record), None);
1030 assert_eq!(mate_unclipped_end(&record), None);
1031 }
1032
1033 #[test]
1034 fn test_mate_unclipped_empty_cigar_mc_tag() {
1035 let record = create_mc_test_read("empty_cigar", 200, "");
1036 assert_eq!(mate_unclipped_start(&record), None);
1037 assert_eq!(mate_unclipped_end(&record), None);
1038 }
1039
1040 #[test]
1045 fn test_get_pair_orientation_fr_normal() {
1046 let read = create_fr_test_read(
1050 "fr_normal",
1051 FLAG_PAIRED | FLAG_READ1 | FLAG_MATE_REVERSE,
1052 100,
1053 150,
1054 200,
1055 0,
1056 0,
1057 );
1058 assert_eq!(get_pair_orientation(&read), PairOrientation::FR);
1059 }
1060
1061 #[test]
1062 fn test_get_pair_orientation_fr_overlapping() {
1063 let read = create_fr_test_read(
1065 "fr_overlap",
1066 FLAG_PAIRED | FLAG_READ1 | FLAG_MATE_REVERSE,
1067 100,
1068 120,
1069 100,
1070 0,
1071 0,
1072 );
1073 assert_eq!(get_pair_orientation(&read), PairOrientation::FR);
1074 }
1075
1076 #[test]
1077 fn test_get_pair_orientation_rf_outie() {
1078 let read = create_fr_test_read(
1082 "rf_outie",
1083 FLAG_PAIRED | FLAG_READ1 | FLAG_MATE_REVERSE,
1084 200,
1085 100,
1086 -100,
1087 0,
1088 0,
1089 );
1090 assert_eq!(get_pair_orientation(&read), PairOrientation::RF);
1091 }
1092
1093 #[test]
1094 fn test_get_pair_orientation_tandem_both_forward() {
1095 let read = create_fr_test_read(
1097 "tandem_fwd",
1098 FLAG_PAIRED | FLAG_READ1, 100,
1100 200,
1101 200,
1102 0,
1103 0,
1104 );
1105 assert_eq!(get_pair_orientation(&read), PairOrientation::Tandem);
1106 }
1107
1108 #[test]
1109 fn test_get_pair_orientation_tandem_both_reverse() {
1110 let read = create_fr_test_read(
1112 "tandem_rev",
1113 FLAG_PAIRED | FLAG_READ1 | FLAG_REVERSE | FLAG_MATE_REVERSE,
1114 100,
1115 200,
1116 200,
1117 0,
1118 0,
1119 );
1120 assert_eq!(get_pair_orientation(&read), PairOrientation::Tandem);
1121 }
1122
1123 #[test]
1124 fn test_get_pair_orientation_fr_from_reverse_strand() {
1125 let read = create_fr_test_read(
1131 "fr_reverse",
1132 FLAG_PAIRED | FLAG_READ1 | FLAG_REVERSE, 200,
1134 100,
1135 200,
1136 0,
1137 0,
1138 );
1139 assert_eq!(get_pair_orientation(&read), PairOrientation::FR);
1140 }
1141
1142 #[test]
1143 fn test_get_pair_orientation_rf_from_reverse_strand() {
1144 let read = create_fr_test_read(
1150 "rf_reverse",
1151 FLAG_PAIRED | FLAG_READ1 | FLAG_REVERSE, 200,
1153 400,
1154 -200,
1155 0,
1156 0,
1157 );
1158 assert_eq!(get_pair_orientation(&read), PairOrientation::RF);
1159 }
1160
1161 fn create_read_pair(
1167 r1_pos: usize,
1168 r1_reverse: bool,
1169 r2_pos: usize,
1170 r2_reverse: bool,
1171 same_ref: bool,
1172 ) -> (RecordBuf, RecordBuf) {
1173 let mut builder = RecordPairBuilder::new()
1174 .name("read_pair")
1175 .r1_sequence(&"A".repeat(100))
1176 .r2_sequence(&"A".repeat(100))
1177 .r1_start(r1_pos)
1178 .r2_start(r2_pos)
1179 .r1_reverse(r1_reverse)
1180 .r2_reverse(r2_reverse);
1181
1182 if !same_ref {
1183 builder = builder.r2_reference_sequence_id(1);
1184 }
1185
1186 builder.build()
1187 }
1188
1189 #[test]
1190 fn test_is_fr_pair_two_records_fr() {
1191 let (r1, r2) = create_read_pair(100, false, 200, true, true);
1193 assert!(is_fr_pair(&r1, &r2), "Should detect FR pair");
1194 }
1195
1196 #[test]
1197 fn test_is_fr_pair_two_records_rf() {
1198 let (r1, r2) = create_read_pair(200, false, 100, true, true);
1201 let mut r1_rf = r1;
1204 *r1_rf.template_length_mut() = -50;
1205 assert!(!is_fr_pair(&r1_rf, &r2), "Should NOT detect FR pair for RF orientation");
1206 }
1207
1208 #[test]
1209 fn test_is_fr_pair_two_records_tandem() {
1210 let (r1, r2) = create_read_pair(100, false, 200, false, true);
1212 assert!(!is_fr_pair(&r1, &r2), "Should NOT detect FR pair for tandem");
1213 }
1214
1215 #[test]
1216 fn test_is_fr_pair_two_records_different_refs() {
1217 let (r1, r2) = create_read_pair(100, false, 200, true, false);
1219 assert!(!is_fr_pair(&r1, &r2), "Should NOT detect FR pair for different refs");
1220 }
1221
1222 #[test]
1223 fn test_is_fr_pair_two_records_unpaired() {
1224 let r1 = RecordBuilder::new()
1226 .name("unpaired")
1227 .sequence("ACGT")
1228 .reference_sequence_id(0)
1229 .alignment_start(100)
1230 .build();
1231
1232 let r2 = RecordBuilder::new()
1233 .name("unpaired")
1234 .sequence("ACGT")
1235 .reference_sequence_id(0)
1236 .alignment_start(200)
1237 .build();
1238
1239 assert!(!is_fr_pair(&r1, &r2), "Should NOT detect FR pair for unpaired reads");
1240 }
1241
1242 #[test]
1243 fn test_is_fr_pair_two_records_unmapped() {
1244 let (mut r1, r2) = create_read_pair(100, false, 200, true, true);
1246 *r1.flags_mut() = Flags::from(FLAG_PAIRED | FLAG_READ1 | FLAG_UNMAPPED | FLAG_MATE_REVERSE);
1247 assert!(!is_fr_pair(&r1, &r2), "Should NOT detect FR pair for unmapped read");
1248 }
1249
1250 #[test]
1255 fn test_parse_cigar_string_empty() {
1256 let ops = parse_cigar_string("");
1257 assert!(ops.is_empty());
1258 }
1259
1260 #[test]
1261 fn test_parse_cigar_string_all_operations() {
1262 let ops = parse_cigar_string("10M5I3D2N7S4H1P6=8X");
1264 assert_eq!(ops.len(), 9);
1265 assert_eq!(ops[0], (Kind::Match, 10));
1266 assert_eq!(ops[1], (Kind::Insertion, 5));
1267 assert_eq!(ops[2], (Kind::Deletion, 3));
1268 assert_eq!(ops[3], (Kind::Skip, 2));
1269 assert_eq!(ops[4], (Kind::SoftClip, 7));
1270 assert_eq!(ops[5], (Kind::HardClip, 4));
1271 assert_eq!(ops[6], (Kind::Pad, 1));
1272 assert_eq!(ops[7], (Kind::SequenceMatch, 6));
1273 assert_eq!(ops[8], (Kind::SequenceMismatch, 8));
1274 }
1275
1276 #[test]
1277 fn test_cigar_reference_length_with_all_ops() {
1278 let ops = parse_cigar_string("10M5I3D2N7S4H1P6=8X");
1281 assert_eq!(cigar_reference_length(&ops), 29);
1283 }
1284
1285 #[test]
1286 fn test_leading_clipping_only_hard_clip() {
1287 let ops = parse_cigar_string("10H50M");
1288 assert_eq!(leading_clipping(&ops), 10);
1289 assert_eq!(leading_soft_clipping(&ops), 0);
1290 }
1291
1292 #[test]
1293 fn test_trailing_clipping_only_hard_clip() {
1294 let ops = parse_cigar_string("50M10H");
1295 assert_eq!(trailing_clipping(&ops), 10);
1296 assert_eq!(trailing_soft_clipping(&ops), 0);
1297 }
1298
1299 #[test]
1300 fn test_alignment_end_simple() {
1301 let record = RecordBuilder::new()
1302 .sequence(&"A".repeat(50))
1303 .alignment_start(100)
1304 .cigar("50M")
1305 .build();
1306 assert_eq!(alignment_end(&record), Some(149));
1308 }
1309
1310 #[test]
1311 fn test_alignment_end_with_deletion() {
1312 let record = RecordBuilder::new()
1313 .sequence(&"A".repeat(50))
1314 .alignment_start(100)
1315 .cigar("25M5D25M")
1316 .build();
1317 assert_eq!(alignment_end(&record), Some(154));
1319 }
1320
1321 #[test]
1322 fn test_alignment_end_no_alignment_start() {
1323 let record = RecordBuilder::new().sequence("ACGT").build();
1325 assert_eq!(alignment_end(&record), None);
1326 }
1327
1328 #[test]
1329 fn test_read_pos_at_ref_pos_complex_cigar() {
1330 let read = create_cigar_test_read("skip", 100, "10M5N10M");
1332
1333 assert_eq!(read_pos_at_ref_pos(&read, 105, false), 6);
1335 assert_eq!(read_pos_at_ref_pos(&read, 112, false), 0);
1337 assert_eq!(read_pos_at_ref_pos(&read, 115, false), 11);
1339 }
1340
1341 #[test]
1342 fn test_read_pos_at_ref_pos_with_sequence_match_mismatch() {
1343 let read = create_cigar_test_read("eq_x", 100, "5=3X5=");
1345
1346 assert_eq!(read_pos_at_ref_pos(&read, 100, false), 1);
1347 assert_eq!(read_pos_at_ref_pos(&read, 105, false), 6); assert_eq!(read_pos_at_ref_pos(&read, 108, false), 9); }
1350
1351 #[test]
1357 fn test_unclipped_start_no_clips() {
1358 let read = create_cigar_test_read("simple", 100, "50M");
1360 assert_eq!(unclipped_start(&read), Some(100));
1361 }
1362
1363 #[test]
1364 fn test_unclipped_start_with_leading_soft_clip() {
1365 let read = create_cigar_test_read("soft", 100, "5S45M");
1367 assert_eq!(unclipped_start(&read), Some(95));
1368 }
1369
1370 #[test]
1371 fn test_unclipped_start_with_leading_hard_clip() {
1372 let read = create_cigar_test_read("hard", 100, "10H50M");
1374 assert_eq!(unclipped_start(&read), Some(90));
1375 }
1376
1377 #[test]
1378 fn test_unclipped_start_with_soft_and_hard_clips() {
1379 let read = create_cigar_test_read("both", 10, "5S45M10H");
1382 assert_eq!(unclipped_start(&read), Some(5));
1383 }
1384
1385 #[test]
1386 fn test_unclipped_start_with_leading_hard_and_soft() {
1387 let read = create_cigar_test_read("hard_soft", 100, "3H5S42M");
1389 assert_eq!(unclipped_start(&read), Some(92));
1390 }
1391
1392 #[test]
1393 fn test_unclipped_end_no_clips() {
1394 let read = create_cigar_test_read("simple", 100, "50M");
1396 assert_eq!(unclipped_end(&read), Some(149));
1397 }
1398
1399 #[test]
1400 fn test_unclipped_end_with_trailing_soft_clip() {
1401 let read = create_cigar_test_read("soft", 100, "45M5S");
1403 assert_eq!(unclipped_end(&read), Some(149));
1404 }
1405
1406 #[test]
1407 fn test_unclipped_end_with_trailing_hard_clip() {
1408 let read = create_cigar_test_read("hard", 100, "50M10H");
1410 assert_eq!(unclipped_end(&read), Some(159));
1411 }
1412
1413 #[test]
1414 fn test_unclipped_end_with_soft_and_hard_clips() {
1415 let read = create_cigar_test_read("both", 10, "5S45M10H");
1419 assert_eq!(unclipped_end(&read), Some(64));
1420 }
1421
1422 #[test]
1423 fn test_unclipped_end_with_trailing_soft_and_hard() {
1424 let read = create_cigar_test_read("soft_hard", 100, "42M5S3H");
1426 assert_eq!(unclipped_end(&read), Some(149));
1427 }
1428
1429 #[test]
1430 fn test_unclipped_five_prime_forward_strand() {
1431 let read = create_cigar_test_read("fwd", 10, "5S45M10H");
1434 assert_eq!(unclipped_five_prime_position(&read), Some(5));
1435 }
1436
1437 #[test]
1438 fn test_unclipped_five_prime_reverse_strand() {
1439 let read = RecordBuilder::new()
1442 .name("rev")
1443 .sequence(&"A".repeat(60))
1444 .reference_sequence_id(0)
1445 .alignment_start(10)
1446 .cigar("5S45M10H")
1447 .flags(Flags::REVERSE_COMPLEMENTED)
1448 .build();
1449 assert_eq!(unclipped_five_prime_position(&read), Some(64));
1450 }
1451
1452 #[test]
1453 fn test_unclipped_positions_unmapped_returns_none() {
1454 let read =
1455 RecordBuilder::new().name("unmapped").sequence("ACGT").flags(Flags::UNMAPPED).build();
1456 assert_eq!(unclipped_start(&read), None);
1457 assert_eq!(unclipped_end(&read), None);
1458 assert_eq!(unclipped_five_prime_position(&read), None);
1459 }
1460
1461 #[test]
1462 fn test_unclipped_end_with_deletion() {
1463 let read = create_cigar_test_read("del", 100, "25M5D25M");
1467 assert_eq!(unclipped_end(&read), Some(154));
1468 }
1469
1470 #[test]
1471 fn test_unclipped_end_with_insertion() {
1472 let read = create_cigar_test_read("ins", 100, "25M5I25M");
1476 assert_eq!(unclipped_end(&read), Some(149));
1477 }
1478}