1use anyhow::{Context, Result};
8use noodles::sam::alignment::record::Cigar;
9use noodles::sam::alignment::record::cigar::op::Kind;
10use noodles::sam::alignment::record_buf::RecordBuf;
11
12use crate::phred::{MIN_PHRED, NO_CALL_BASE, NO_CALL_BASE_LOWER};
13use fgumi_raw_bam;
14
15#[inline]
18fn is_no_call(base: u8) -> bool {
19 matches!(base, NO_CALL_BASE | NO_CALL_BASE_LOWER | b'.')
20}
21
22#[derive(Debug, Clone, Copy, PartialEq, Eq)]
24pub enum AgreementStrategy {
25 Consensus,
27 MaxQual,
29 PassThrough,
31}
32
33#[derive(Debug, Clone, Copy, PartialEq, Eq)]
35pub enum DisagreementStrategy {
36 Consensus,
38 MaskBoth,
40 MaskLowerQual,
42}
43
44#[derive(Debug, Default, Clone)]
46pub struct CorrectionStats {
47 pub overlapping_bases: usize,
49 pub bases_agreeing: usize,
51 pub bases_disagreeing: usize,
53 pub bases_corrected: usize,
55}
56
57impl CorrectionStats {
58 #[must_use]
60 pub fn new() -> Self {
61 Self::default()
62 }
63
64 pub fn reset(&mut self) {
66 self.overlapping_bases = 0;
67 self.bases_agreeing = 0;
68 self.bases_disagreeing = 0;
69 self.bases_corrected = 0;
70 }
71
72 pub fn merge(&mut self, other: &Self) {
74 self.overlapping_bases += other.overlapping_bases;
75 self.bases_agreeing += other.bases_agreeing;
76 self.bases_disagreeing += other.bases_disagreeing;
77 self.bases_corrected += other.bases_corrected;
78 }
79}
80
81pub struct OverlappingBasesConsensusCaller {
83 agreement_strategy: AgreementStrategy,
84 disagreement_strategy: DisagreementStrategy,
85 stats: CorrectionStats,
86}
87
88impl OverlappingBasesConsensusCaller {
89 #[must_use]
95 pub fn new(
96 agreement_strategy: AgreementStrategy,
97 disagreement_strategy: DisagreementStrategy,
98 ) -> Self {
99 Self { agreement_strategy, disagreement_strategy, stats: CorrectionStats::new() }
100 }
101
102 #[must_use]
104 pub fn stats(&self) -> &CorrectionStats {
105 &self.stats
106 }
107
108 pub fn reset_stats(&mut self) {
110 self.stats.reset();
111 }
112
113 pub fn call(&mut self, r1: &mut RecordBuf, r2: &mut RecordBuf) -> Result<bool> {
134 if r1.flags().is_unmapped() || r2.flags().is_unmapped() {
136 return Ok(false);
137 }
138
139 if r1.reference_sequence_id() != r2.reference_sequence_id() {
141 return Ok(false);
142 }
143
144 if r1.alignment_start().is_none()
146 || r1.alignment_end().is_none()
147 || r2.alignment_start().is_none()
148 || r2.alignment_end().is_none()
149 {
150 return Ok(false);
151 }
152
153 let Ok(overlap_iter) = ReadMateAndRefPosIterator::new(r1, r2) else { return Ok(false) };
155
156 let overlapping_positions: Vec<_> = overlap_iter.collect();
158
159 if overlapping_positions.is_empty() {
160 return Ok(false);
161 }
162
163 let mut r1_seq: Vec<u8> = r1.sequence().as_ref().to_vec();
165 let mut r2_seq: Vec<u8> = r2.sequence().as_ref().to_vec();
166 let mut r1_quals: Vec<u8> = r1.quality_scores().as_ref().to_vec();
167 let mut r2_quals: Vec<u8> = r2.quality_scores().as_ref().to_vec();
168 let mut modified = false;
169
170 for pos in overlapping_positions {
172 let r1_base = r1_seq[pos.read_offset];
173 let r2_base = r2_seq[pos.mate_offset];
174
175 if is_no_call(r1_base) || is_no_call(r2_base) {
177 continue;
178 }
179
180 self.stats.overlapping_bases += 1;
181
182 let r1_qual = r1_quals[pos.read_offset];
183 let r2_qual = r2_quals[pos.mate_offset];
184
185 if r1_base == r2_base {
186 self.stats.bases_agreeing += 1;
188 if self.process_agreement(
189 pos.read_offset,
190 pos.mate_offset,
191 r1_qual,
192 r2_qual,
193 &mut r1_quals,
194 &mut r2_quals,
195 ) {
196 modified = true;
197 }
198 } else {
199 self.stats.bases_disagreeing += 1;
201 self.process_disagreement(
202 pos.read_offset,
203 pos.mate_offset,
204 r1_base,
205 r2_base,
206 r1_qual,
207 r2_qual,
208 &mut r1_seq,
209 &mut r2_seq,
210 &mut r1_quals,
211 &mut r2_quals,
212 );
213 modified = true;
214 }
215 }
216
217 if modified {
219 *r1.sequence_mut() = r1_seq.into();
220 *r2.sequence_mut() = r2_seq.into();
221 *r1.quality_scores_mut() = r1_quals.into();
222 *r2.quality_scores_mut() = r2_quals.into();
223 }
224
225 Ok(true)
226 }
227
228 fn process_agreement(
230 &mut self,
231 r1_offset: usize,
232 r2_offset: usize,
233 r1_qual: u8,
234 r2_qual: u8,
235 r1_quals: &mut [u8],
236 r2_quals: &mut [u8],
237 ) -> bool {
238 match self.agreement_strategy {
239 AgreementStrategy::Consensus => {
240 let new_qual = (u16::from(r1_qual) + u16::from(r2_qual)).min(93) as u8;
242 r1_quals[r1_offset] = new_qual;
243 r2_quals[r2_offset] = new_qual;
244
245 if new_qual != r1_qual || new_qual != r2_qual {
246 self.stats.bases_corrected += 1;
247 true
248 } else {
249 false
250 }
251 }
252 AgreementStrategy::MaxQual => {
253 let new_qual = r1_qual.max(r2_qual);
255 r1_quals[r1_offset] = new_qual;
256 r2_quals[r2_offset] = new_qual;
257
258 if new_qual != r1_qual || new_qual != r2_qual {
259 self.stats.bases_corrected += 1;
260 true
261 } else {
262 false
263 }
264 }
265 AgreementStrategy::PassThrough => false,
266 }
267 }
268
269 #[expect(
271 clippy::too_many_arguments,
272 reason = "disagreement processing requires all base/quality parameters from both reads"
273 )]
274 fn process_disagreement(
275 &mut self,
276 r1_offset: usize,
277 r2_offset: usize,
278 r1_base: u8,
279 r2_base: u8,
280 r1_qual: u8,
281 r2_qual: u8,
282 r1_seq: &mut [u8],
283 r2_seq: &mut [u8],
284 r1_quals: &mut [u8],
285 r2_quals: &mut [u8],
286 ) {
287 match self.disagreement_strategy {
288 DisagreementStrategy::Consensus => {
289 let (consensus_base, consensus_qual) = match r1_qual.cmp(&r2_qual) {
292 std::cmp::Ordering::Equal => (NO_CALL_BASE, MIN_PHRED),
293 std::cmp::Ordering::Greater => {
294 (r1_base, r1_qual.saturating_sub(r2_qual).max(MIN_PHRED))
295 }
296 std::cmp::Ordering::Less => {
297 (r2_base, r2_qual.saturating_sub(r1_qual).max(MIN_PHRED))
298 }
299 };
300
301 r1_seq[r1_offset] = consensus_base;
302 r2_seq[r2_offset] = consensus_base;
303 r1_quals[r1_offset] = consensus_qual;
304 r2_quals[r2_offset] = consensus_qual;
305
306 self.stats.bases_corrected += 2;
307 }
308 DisagreementStrategy::MaskBoth => {
309 r1_seq[r1_offset] = NO_CALL_BASE;
311 r2_seq[r2_offset] = NO_CALL_BASE;
312 r1_quals[r1_offset] = MIN_PHRED;
313 r2_quals[r2_offset] = MIN_PHRED;
314
315 self.stats.bases_corrected += 2;
316 }
317 DisagreementStrategy::MaskLowerQual => {
318 match r1_qual.cmp(&r2_qual) {
320 std::cmp::Ordering::Less => {
321 r1_seq[r1_offset] = NO_CALL_BASE;
322 r1_quals[r1_offset] = MIN_PHRED;
323 self.stats.bases_corrected += 1;
324 }
325 std::cmp::Ordering::Greater => {
326 r2_seq[r2_offset] = NO_CALL_BASE;
327 r2_quals[r2_offset] = MIN_PHRED;
328 self.stats.bases_corrected += 1;
329 }
330 std::cmp::Ordering::Equal => {
331 r1_seq[r1_offset] = NO_CALL_BASE;
333 r2_seq[r2_offset] = NO_CALL_BASE;
334 r1_quals[r1_offset] = MIN_PHRED;
335 r2_quals[r2_offset] = MIN_PHRED;
336
337 self.stats.bases_corrected += 2;
338 }
339 }
340 }
341 }
342 }
343}
344
345#[derive(Debug, Clone, Copy)]
347struct ReadPosition {
348 read_offset: usize,
350 ref_pos: i32,
352}
353
354#[derive(Debug, Clone, Copy)]
356struct ReadMateAndRefPos {
357 read_offset: usize,
359 mate_offset: usize,
361}
362
363fn kind_to_bam_op(kind: Kind) -> u8 {
365 match kind {
366 Kind::Match => 0,
367 Kind::Insertion => 1,
368 Kind::Deletion => 2,
369 Kind::Skip => 3,
370 Kind::SoftClip => 4,
371 Kind::HardClip => 5,
372 Kind::Pad => 6,
373 Kind::SequenceMatch => 7,
374 Kind::SequenceMismatch => 8,
375 }
376}
377
378struct ReadAndRefPosIterator {
388 cur_read_pos: i32,
390 cur_ref_pos: i32,
392 cigar_ops: Vec<(u8, usize)>,
394 element_index: usize,
396 in_elem_offset: usize,
398 start_ref_pos: i32,
400 end_ref_pos: i32,
402 start_read_pos: i32,
404 end_read_pos: i32,
406}
407
408#[expect(
409 clippy::cast_possible_truncation,
410 clippy::cast_possible_wrap,
411 clippy::cast_sign_loss,
412 reason = "position arithmetic requires casts between BAM integer types"
413)]
414impl ReadAndRefPosIterator {
415 fn new_with_mate(record: &RecordBuf, mate: &RecordBuf) -> Result<Self> {
417 let rec_start = match record.alignment_start() {
418 Some(pos) => usize::from(pos) as i32,
419 None => return Err(anyhow::anyhow!("Record has no alignment start")),
420 };
421 let rec_end = match record.alignment_end() {
422 Some(pos) => usize::from(pos) as i32,
423 None => return Err(anyhow::anyhow!("Record has no alignment end")),
424 };
425
426 let mate_start = match mate.alignment_start() {
427 Some(pos) => usize::from(pos) as i32,
428 None => return Err(anyhow::anyhow!("Mate has no alignment start")),
429 };
430 let mate_end = match mate.alignment_end() {
431 Some(pos) => usize::from(pos) as i32,
432 None => return Err(anyhow::anyhow!("Mate has no alignment end")),
433 };
434
435 let min_ref_pos = rec_start.max(mate_start);
436 let max_ref_pos = rec_end.min(mate_end);
437 let rec_len = record.sequence().len() as i32;
438
439 let cigar = record.cigar();
440 let cigar_ops: Vec<_> = cigar
441 .iter()
442 .filter_map(std::result::Result::ok)
443 .map(|op| (kind_to_bam_op(op.kind()), op.len()))
444 .collect();
445
446 let start_read_pos = 1i32;
447 let end_read_pos = rec_len;
448 let start_ref_pos = rec_start.max(min_ref_pos);
449 let end_ref_pos = rec_end.min(max_ref_pos);
450
451 let mut iter = Self {
452 cur_read_pos: 1,
453 cur_ref_pos: rec_start,
454 cigar_ops,
455 element_index: 0,
456 in_elem_offset: 0,
457 start_ref_pos,
458 end_ref_pos,
459 start_read_pos,
460 end_read_pos,
461 };
462
463 iter.skip_to_start();
464 Ok(iter)
465 }
466
467 fn new_raw_with_mate(
469 bam: &[u8],
470 rec_start: usize,
471 rec_end: usize,
472 mate_start: usize,
473 mate_end: usize,
474 ) -> Self {
475 let rec_start_i32 = rec_start as i32;
476 let rec_end_i32 = rec_end as i32;
477 let rec_len = fgumi_raw_bam::l_seq(bam) as i32;
478
479 let min_ref_pos = rec_start_i32.max(mate_start as i32);
480 let max_ref_pos = rec_end_i32.min(mate_end as i32);
481
482 let raw_ops = fgumi_raw_bam::get_cigar_ops(bam);
483 let cigar_ops: Vec<(u8, usize)> =
484 raw_ops.iter().map(|&op| ((op & 0xF) as u8, (op >> 4) as usize)).collect();
485
486 let start_read_pos = 1i32;
487 let end_read_pos = rec_len;
488 let start_ref_pos = rec_start_i32.max(min_ref_pos);
489 let end_ref_pos = rec_end_i32.min(max_ref_pos);
490
491 let mut iter = Self {
492 cur_read_pos: 1,
493 cur_ref_pos: rec_start_i32,
494 cigar_ops,
495 element_index: 0,
496 in_elem_offset: 0,
497 start_ref_pos,
498 end_ref_pos,
499 start_read_pos,
500 end_read_pos,
501 };
502
503 iter.skip_to_start();
504 iter
505 }
506
507 fn cur_elem_length_on_target(&self) -> usize {
509 if self.element_index >= self.cigar_ops.len() {
510 return 0;
511 }
512 let (op_type, len) = self.cigar_ops[self.element_index];
513 if matches!(op_type, 0 | 2 | 3 | 7 | 8) { len } else { 0 }
515 }
516
517 fn cur_elem_length_on_query(&self) -> usize {
519 if self.element_index >= self.cigar_ops.len() {
520 return 0;
521 }
522 let (op_type, len) = self.cigar_ops[self.element_index];
523 if matches!(op_type, 0 | 1 | 4 | 7 | 8) { len } else { 0 }
525 }
526
527 fn cur_elem_is_alignment(&self) -> bool {
529 if self.element_index >= self.cigar_ops.len() {
530 return false;
531 }
532 let (op_type, _) = self.cigar_ops[self.element_index];
533 matches!(op_type, 0 | 7 | 8)
535 }
536
537 fn skip_to_start(&mut self) {
539 while self.element_index < self.cigar_ops.len() {
540 let cur_ref_end = self.cur_ref_pos + self.cur_elem_length_on_target() as i32 - 1;
541 let cur_read_end = self.cur_read_pos + self.cur_elem_length_on_query() as i32 - 1;
542
543 if cur_ref_end >= self.start_ref_pos && cur_read_end >= self.start_read_pos {
544 break;
545 }
546
547 self.cur_ref_pos += self.cur_elem_length_on_target() as i32;
548 self.cur_read_pos += self.cur_elem_length_on_query() as i32;
549 self.element_index += 1;
550 }
551
552 self.skip_non_aligned_and_adjust_offset();
553 }
554
555 fn skip_non_aligned_and_adjust_offset(&mut self) {
557 self.in_elem_offset = 0;
558
559 while self.element_index < self.cigar_ops.len() && !self.cur_elem_is_alignment() {
560 self.cur_ref_pos += self.cur_elem_length_on_target() as i32;
561 self.cur_read_pos += self.cur_elem_length_on_query() as i32;
562 self.element_index += 1;
563 }
564
565 if self.element_index < self.cigar_ops.len()
566 && (self.cur_ref_pos < self.start_ref_pos || self.cur_read_pos < self.start_read_pos)
567 {
568 let offset = (self.start_ref_pos - self.cur_ref_pos)
569 .max(self.start_read_pos - self.cur_read_pos);
570 self.in_elem_offset = offset as usize;
571 self.cur_ref_pos += offset;
572 self.cur_read_pos += offset;
573 }
574 }
575}
576
577#[expect(
578 clippy::cast_sign_loss,
579 reason = "position arithmetic requires casts between BAM integer types"
580)]
581impl Iterator for ReadAndRefPosIterator {
582 type Item = ReadPosition;
583
584 fn next(&mut self) -> Option<Self::Item> {
585 if self.element_index < self.cigar_ops.len() {
586 let (_, len) = self.cigar_ops[self.element_index];
587 if self.in_elem_offset >= len {
588 self.element_index += 1;
589 self.skip_non_aligned_and_adjust_offset();
590 }
591 }
592
593 if self.element_index >= self.cigar_ops.len()
594 || self.cur_read_pos > self.end_read_pos
595 || self.cur_ref_pos > self.end_ref_pos
596 {
597 return None;
598 }
599
600 let pos = ReadPosition {
601 read_offset: (self.cur_read_pos - 1) as usize,
602 ref_pos: self.cur_ref_pos,
603 };
604
605 self.cur_read_pos += 1;
606 self.cur_ref_pos += 1;
607 self.in_elem_offset += 1;
608
609 Some(pos)
610 }
611}
612
613struct ReadMateAndRefPosIterator {
621 rec_iter: std::iter::Peekable<ReadAndRefPosIterator>,
622 mate_iter: std::iter::Peekable<ReadAndRefPosIterator>,
623}
624
625impl ReadMateAndRefPosIterator {
626 fn new(record: &RecordBuf, mate: &RecordBuf) -> Result<Self> {
628 let rec_iter = ReadAndRefPosIterator::new_with_mate(record, mate)?.peekable();
629 let mate_iter = ReadAndRefPosIterator::new_with_mate(mate, record)?.peekable();
630
631 Ok(Self { rec_iter, mate_iter })
632 }
633
634 fn new_raw(
636 r1: &[u8],
637 r2: &[u8],
638 r1_start: usize,
639 r1_end: usize,
640 r2_start: usize,
641 r2_end: usize,
642 ) -> Self {
643 let rec_iter =
644 ReadAndRefPosIterator::new_raw_with_mate(r1, r1_start, r1_end, r2_start, r2_end)
645 .peekable();
646 let mate_iter =
647 ReadAndRefPosIterator::new_raw_with_mate(r2, r2_start, r2_end, r1_start, r1_end)
648 .peekable();
649
650 Self { rec_iter, mate_iter }
651 }
652}
653
654impl Iterator for ReadMateAndRefPosIterator {
655 type Item = ReadMateAndRefPos;
656
657 fn next(&mut self) -> Option<Self::Item> {
658 use std::cmp::Ordering;
659
660 loop {
661 let rec_pos = self.rec_iter.peek()?;
662 let mate_pos = self.mate_iter.peek()?;
663
664 match rec_pos.ref_pos.cmp(&mate_pos.ref_pos) {
665 Ordering::Less => {
666 self.rec_iter.next();
667 }
668 Ordering::Greater => {
669 self.mate_iter.next();
670 }
671 Ordering::Equal => {
672 let rec = self.rec_iter.next().expect("peeked Some");
673 let mate = self.mate_iter.next().expect("peeked Some");
674
675 return Some(ReadMateAndRefPos {
676 read_offset: rec.read_offset,
677 mate_offset: mate.read_offset,
678 });
679 }
680 }
681 }
682 }
683}
684
685pub fn apply_overlapping_consensus(
703 reads: &mut [RecordBuf],
704 caller: &mut OverlappingBasesConsensusCaller,
705) -> Result<()> {
706 use ahash::AHashMap;
707
708 let mut read_pairs: AHashMap<String, (Option<usize>, Option<usize>)> = AHashMap::new();
710
711 for (idx, record) in reads.iter().enumerate() {
712 let read_name = record.name().map(std::string::ToString::to_string).unwrap_or_default();
713
714 if record.flags().is_first_segment() {
715 read_pairs.entry(read_name).or_insert((None, None)).0 = Some(idx);
716 } else if record.flags().is_last_segment() {
717 read_pairs.entry(read_name).or_insert((None, None)).1 = Some(idx);
718 }
719 }
721
722 for (r1_idx, r2_idx) in read_pairs.values() {
724 if let (Some(idx1), Some(idx2)) = (r1_idx, r2_idx) {
725 let (r1, r2) = if idx1 < idx2 {
727 let (left, right) = reads.split_at_mut(*idx2);
728 (&mut left[*idx1], &mut right[0])
729 } else {
730 let (left, right) = reads.split_at_mut(*idx1);
731 (&mut right[0], &mut left[*idx2])
732 };
733
734 caller.call(r1, r2).context("Failed to call overlapping consensus")?;
735 }
736 }
737
738 Ok(())
739}
740
741impl OverlappingBasesConsensusCaller {
746 pub fn call_raw(&mut self, r1: &mut [u8], r2: &mut [u8]) -> Result<bool> {
758 if fgumi_raw_bam::flags(r1) & fgumi_raw_bam::flags::UNMAPPED != 0
760 || fgumi_raw_bam::flags(r2) & fgumi_raw_bam::flags::UNMAPPED != 0
761 {
762 return Ok(false);
763 }
764
765 if fgumi_raw_bam::ref_id(r1) != fgumi_raw_bam::ref_id(r2) {
767 return Ok(false);
768 }
769
770 let Some(r1_start) = fgumi_raw_bam::alignment_start_from_raw(r1) else {
772 return Ok(false);
773 };
774 let Some(r1_end) = fgumi_raw_bam::alignment_end_from_raw(r1) else { return Ok(false) };
775 let Some(r2_start) = fgumi_raw_bam::alignment_start_from_raw(r2) else {
776 return Ok(false);
777 };
778 let Some(r2_end) = fgumi_raw_bam::alignment_end_from_raw(r2) else { return Ok(false) };
779
780 let overlap_iter =
782 ReadMateAndRefPosIterator::new_raw(r1, r2, r1_start, r1_end, r2_start, r2_end);
783
784 let overlapping_positions: Vec<_> = overlap_iter.collect();
785
786 if overlapping_positions.is_empty() {
787 return Ok(false);
788 }
789
790 let mut r1_seq = fgumi_raw_bam::extract_sequence(r1);
792 let mut r2_seq = fgumi_raw_bam::extract_sequence(r2);
793 let mut r1_quals: Vec<u8> = fgumi_raw_bam::quality_scores_slice(r1).to_vec();
794 let mut r2_quals: Vec<u8> = fgumi_raw_bam::quality_scores_slice(r2).to_vec();
795 let mut modified = false;
796
797 for pos in overlapping_positions {
798 let r1_base = r1_seq[pos.read_offset];
799 let r2_base = r2_seq[pos.mate_offset];
800
801 if is_no_call(r1_base) || is_no_call(r2_base) {
802 continue;
803 }
804
805 self.stats.overlapping_bases += 1;
806
807 let r1_qual = r1_quals[pos.read_offset];
808 let r2_qual = r2_quals[pos.mate_offset];
809
810 if r1_base == r2_base {
811 self.stats.bases_agreeing += 1;
812 if self.process_agreement(
813 pos.read_offset,
814 pos.mate_offset,
815 r1_qual,
816 r2_qual,
817 &mut r1_quals,
818 &mut r2_quals,
819 ) {
820 modified = true;
821 }
822 } else {
823 self.stats.bases_disagreeing += 1;
824 self.process_disagreement(
825 pos.read_offset,
826 pos.mate_offset,
827 r1_base,
828 r2_base,
829 r1_qual,
830 r2_qual,
831 &mut r1_seq,
832 &mut r2_seq,
833 &mut r1_quals,
834 &mut r2_quals,
835 );
836 modified = true;
837 }
838 }
839
840 if modified {
842 let r1_seq_off = fgumi_raw_bam::seq_offset(r1);
843 let r2_seq_off = fgumi_raw_bam::seq_offset(r2);
844 for (i, &base) in r1_seq.iter().enumerate() {
845 fgumi_raw_bam::set_base(r1, r1_seq_off, i, base);
846 }
847 for (i, &base) in r2_seq.iter().enumerate() {
848 fgumi_raw_bam::set_base(r2, r2_seq_off, i, base);
849 }
850 let r1_qual_off = fgumi_raw_bam::qual_offset(r1);
851 let r2_qual_off = fgumi_raw_bam::qual_offset(r2);
852 r1[r1_qual_off..r1_qual_off + r1_quals.len()].copy_from_slice(&r1_quals);
853 r2[r2_qual_off..r2_qual_off + r2_quals.len()].copy_from_slice(&r2_quals);
854 }
855
856 Ok(true)
857 }
858}
859
860pub fn apply_overlapping_consensus_raw(
868 records: &mut [Vec<u8>],
869 caller: &mut OverlappingBasesConsensusCaller,
870) -> Result<()> {
871 use ahash::AHashMap;
872
873 let mut read_pairs: AHashMap<Vec<u8>, (Option<usize>, Option<usize>)> = AHashMap::new();
875
876 for (idx, record) in records.iter().enumerate() {
877 let name = fgumi_raw_bam::read_name(record).to_vec();
878 let flg = fgumi_raw_bam::flags(record);
879
880 if flg & fgumi_raw_bam::flags::FIRST_SEGMENT != 0 {
881 read_pairs.entry(name).or_insert((None, None)).0 = Some(idx);
882 } else if flg & fgumi_raw_bam::flags::LAST_SEGMENT != 0 {
883 read_pairs.entry(name).or_insert((None, None)).1 = Some(idx);
884 }
885 }
886
887 for (r1_idx, r2_idx) in read_pairs.values() {
889 if let (Some(idx1), Some(idx2)) = (r1_idx, r2_idx) {
890 let (r1, r2) = if idx1 < idx2 {
891 let (left, right) = records.split_at_mut(*idx2);
892 (&mut left[*idx1], &mut right[0])
893 } else {
894 let (left, right) = records.split_at_mut(*idx1);
895 (&mut right[0], &mut left[*idx2])
896 };
897
898 caller.call_raw(r1, r2).context("Failed to call overlapping consensus on raw bytes")?;
899 }
900 }
901
902 Ok(())
903}
904
905#[cfg(test)]
906#[expect(
907 clippy::cast_possible_truncation,
908 clippy::cast_possible_wrap,
909 clippy::match_same_arms,
910 reason = "test code uses integer casts for position arithmetic"
911)]
912mod tests {
913 use super::*;
914 use fgumi_sam::builder::RecordBuilder;
915 use noodles::sam::alignment::record::Flags;
916
917 fn create_test_record(seq: &[u8], qual: &[u8], start: usize, cigar: &str) -> RecordBuf {
919 RecordBuilder::mapped_read()
920 .sequence(&String::from_utf8_lossy(seq))
921 .qualities(qual)
922 .alignment_start(start)
923 .cigar(cigar)
924 .build()
925 }
926
927 #[test]
928 fn test_agreement_strategy_consensus() {
929 let mut caller = OverlappingBasesConsensusCaller::new(
930 AgreementStrategy::Consensus,
931 DisagreementStrategy::Consensus,
932 );
933
934 assert_eq!(caller.agreement_strategy, AgreementStrategy::Consensus);
935
936 let mut r1 = create_test_record(b"ACGT", &[30, 30, 30, 30], 100, "4M");
938 let mut r2 = create_test_record(b"ACGT", &[20, 20, 20, 20], 100, "4M");
939
940 let result = caller.call(&mut r1, &mut r2).unwrap();
941 assert!(result);
942
943 assert_eq!(r1.quality_scores().as_ref()[0], 50);
945 assert_eq!(r2.quality_scores().as_ref()[0], 50);
946 }
947
948 #[test]
949 fn test_agreement_strategy_max_qual() {
950 let mut caller = OverlappingBasesConsensusCaller::new(
951 AgreementStrategy::MaxQual,
952 DisagreementStrategy::Consensus,
953 );
954
955 let mut r1 = create_test_record(b"ACGT", &[30, 30, 30, 30], 100, "4M");
956 let mut r2 = create_test_record(b"ACGT", &[20, 20, 20, 20], 100, "4M");
957
958 let result = caller.call(&mut r1, &mut r2).unwrap();
959 assert!(result);
960
961 assert_eq!(r1.quality_scores().as_ref()[0], 30);
963 assert_eq!(r2.quality_scores().as_ref()[0], 30);
964 }
965
966 #[test]
967 fn test_agreement_strategy_pass_through() {
968 let mut caller = OverlappingBasesConsensusCaller::new(
969 AgreementStrategy::PassThrough,
970 DisagreementStrategy::Consensus,
971 );
972
973 let mut r1 = create_test_record(b"ACGT", &[30, 30, 30, 30], 100, "4M");
974 let mut r2 = create_test_record(b"ACGT", &[20, 20, 20, 20], 100, "4M");
975
976 let result = caller.call(&mut r1, &mut r2).unwrap();
977 assert!(result);
978
979 assert_eq!(r1.quality_scores().as_ref()[0], 30);
981 assert_eq!(r2.quality_scores().as_ref()[0], 20);
982 }
983
984 #[test]
985 fn test_disagreement_strategy_consensus() {
986 let mut caller = OverlappingBasesConsensusCaller::new(
987 AgreementStrategy::PassThrough,
988 DisagreementStrategy::Consensus,
989 );
990
991 let mut r1 = create_test_record(b"ACGT", &[30, 30, 30, 30], 100, "4M");
992 let mut r2 = create_test_record(b"GCTA", &[20, 20, 20, 20], 100, "4M");
993
994 let result = caller.call(&mut r1, &mut r2).unwrap();
995 assert!(result);
996
997 assert_eq!(r1.sequence().as_ref()[0], b'A');
999 assert_eq!(r2.sequence().as_ref()[0], b'A');
1000 assert_eq!(r1.quality_scores().as_ref()[0], 10);
1001 assert_eq!(r2.quality_scores().as_ref()[0], 10);
1002 }
1003
1004 #[test]
1005 fn test_disagreement_strategy_mask_both() {
1006 let mut caller = OverlappingBasesConsensusCaller::new(
1007 AgreementStrategy::PassThrough,
1008 DisagreementStrategy::MaskBoth,
1009 );
1010
1011 let mut r1 = create_test_record(b"ACGT", &[30, 30, 30, 30], 100, "4M");
1012 let mut r2 = create_test_record(b"GCTA", &[20, 20, 20, 20], 100, "4M");
1013
1014 let result = caller.call(&mut r1, &mut r2).unwrap();
1015 assert!(result);
1016
1017 assert_eq!(r1.sequence().as_ref()[0], b'N');
1019 assert_eq!(r2.sequence().as_ref()[0], b'N');
1020 assert_eq!(r1.quality_scores().as_ref()[0], 2);
1021 assert_eq!(r2.quality_scores().as_ref()[0], 2);
1022 }
1023
1024 #[test]
1025 fn test_disagreement_strategy_mask_lower_qual() {
1026 let mut caller = OverlappingBasesConsensusCaller::new(
1027 AgreementStrategy::PassThrough,
1028 DisagreementStrategy::MaskLowerQual,
1029 );
1030
1031 let mut r1 = create_test_record(b"ACGT", &[30, 30, 30, 30], 100, "4M");
1032 let mut r2 = create_test_record(b"GCTA", &[20, 20, 20, 20], 100, "4M");
1033
1034 let result = caller.call(&mut r1, &mut r2).unwrap();
1035 assert!(result);
1036
1037 assert_eq!(r1.sequence().as_ref()[0], b'A'); assert_eq!(r2.sequence().as_ref()[0], b'N'); assert_eq!(r1.quality_scores().as_ref()[0], 30); assert_eq!(r2.quality_scores().as_ref()[0], 2); }
1043
1044 #[test]
1045 fn test_disagreement_mask_lower_qual_r1_lower() {
1046 let mut caller = OverlappingBasesConsensusCaller::new(
1047 AgreementStrategy::PassThrough,
1048 DisagreementStrategy::MaskLowerQual,
1049 );
1050
1051 let mut r1 = create_test_record(b"ACGT", &[20, 20, 20, 20], 100, "4M");
1052 let mut r2 = create_test_record(b"GCTA", &[30, 30, 30, 30], 100, "4M");
1053
1054 let result = caller.call(&mut r1, &mut r2).unwrap();
1055 assert!(result);
1056
1057 assert_eq!(r1.sequence().as_ref()[0], b'N'); assert_eq!(r2.sequence().as_ref()[0], b'G'); }
1061
1062 #[test]
1063 fn test_disagreement_mask_lower_qual_equal_quality() {
1064 let mut caller = OverlappingBasesConsensusCaller::new(
1065 AgreementStrategy::PassThrough,
1066 DisagreementStrategy::MaskLowerQual,
1067 );
1068
1069 let mut r1 = create_test_record(b"ACGT", &[30, 30, 30, 30], 100, "4M");
1070 let mut r2 = create_test_record(b"GCTA", &[30, 30, 30, 30], 100, "4M");
1071
1072 let result = caller.call(&mut r1, &mut r2).unwrap();
1073 assert!(result);
1074
1075 assert_eq!(r1.sequence().as_ref()[0], b'N');
1077 assert_eq!(r2.sequence().as_ref()[0], b'N');
1078 assert_eq!(r1.quality_scores().as_ref()[0], 2);
1079 assert_eq!(r2.quality_scores().as_ref()[0], 2);
1080 }
1081
1082 #[test]
1083 fn test_disagreement_consensus_equal_quality() {
1084 let mut caller = OverlappingBasesConsensusCaller::new(
1085 AgreementStrategy::PassThrough,
1086 DisagreementStrategy::Consensus,
1087 );
1088
1089 let mut r1 = create_test_record(b"ACGT", &[30, 30, 30, 30], 100, "4M");
1090 let mut r2 = create_test_record(b"GCTA", &[30, 30, 30, 30], 100, "4M");
1091
1092 let result = caller.call(&mut r1, &mut r2).unwrap();
1093 assert!(result);
1094
1095 assert_eq!(r1.sequence().as_ref()[0], b'N');
1097 assert_eq!(r2.sequence().as_ref()[0], b'N');
1098 assert_eq!(r1.quality_scores().as_ref()[0], 2);
1099 assert_eq!(r2.quality_scores().as_ref()[0], 2);
1100 }
1101
1102 #[test]
1103 fn test_no_overlap() {
1104 let mut caller = OverlappingBasesConsensusCaller::new(
1105 AgreementStrategy::Consensus,
1106 DisagreementStrategy::Consensus,
1107 );
1108
1109 let mut r1 = create_test_record(b"ACGT", &[30, 30, 30, 30], 100, "4M");
1111 let mut r2 = create_test_record(b"ACGT", &[20, 20, 20, 20], 200, "4M");
1112
1113 let result = caller.call(&mut r1, &mut r2).unwrap();
1114 assert!(!result); }
1116
1117 #[test]
1118 fn test_unmapped_reads() {
1119 let mut caller = OverlappingBasesConsensusCaller::new(
1120 AgreementStrategy::Consensus,
1121 DisagreementStrategy::Consensus,
1122 );
1123
1124 let mut r1 = create_test_record(b"ACGT", &[30, 30, 30, 30], 100, "4M");
1125 let mut r2 = create_test_record(b"ACGT", &[20, 20, 20, 20], 100, "4M");
1126 *r1.flags_mut() = Flags::UNMAPPED;
1127
1128 let result = caller.call(&mut r1, &mut r2).unwrap();
1129 assert!(!result); }
1131
1132 #[test]
1133 fn test_quality_capping_at_93() {
1134 let mut caller = OverlappingBasesConsensusCaller::new(
1135 AgreementStrategy::Consensus,
1136 DisagreementStrategy::Consensus,
1137 );
1138
1139 let mut r1 = create_test_record(b"ACGT", &[50, 50, 50, 50], 100, "4M");
1141 let mut r2 = create_test_record(b"ACGT", &[50, 50, 50, 50], 100, "4M");
1142
1143 let result = caller.call(&mut r1, &mut r2).unwrap();
1144 assert!(result);
1145
1146 assert_eq!(r1.quality_scores().as_ref()[0], 93);
1148 }
1149
1150 #[test]
1151 fn test_stats_tracking() {
1152 let mut caller = OverlappingBasesConsensusCaller::new(
1153 AgreementStrategy::Consensus,
1154 DisagreementStrategy::Consensus,
1155 );
1156
1157 let mut r1 = create_test_record(b"ACGT", &[30, 30, 30, 30], 100, "4M");
1159 let mut r2 = create_test_record(b"ACGT", &[20, 20, 20, 20], 100, "4M");
1160
1161 let result = caller.call(&mut r1, &mut r2).unwrap();
1162 assert!(result);
1163
1164 assert!(caller.stats().overlapping_bases > 0);
1169 assert_eq!(caller.stats().bases_agreeing, caller.stats().overlapping_bases);
1170 assert_eq!(caller.stats().bases_disagreeing, 0);
1171 assert_eq!(caller.stats().bases_corrected, caller.stats().overlapping_bases);
1173 }
1174
1175 #[test]
1176 fn test_stats_tracking_with_disagreements() {
1177 let mut caller = OverlappingBasesConsensusCaller::new(
1178 AgreementStrategy::PassThrough,
1179 DisagreementStrategy::Consensus,
1180 );
1181
1182 let mut r1 = create_test_record(b"ACGT", &[30, 30, 30, 30], 100, "4M");
1185 let mut r2 = create_test_record(b"TGCA", &[20, 20, 20, 20], 100, "4M");
1186
1187 let result = caller.call(&mut r1, &mut r2).unwrap();
1188 assert!(result);
1189
1190 assert!(caller.stats().overlapping_bases > 0);
1192 assert_eq!(caller.stats().bases_agreeing, 0);
1193 assert_eq!(caller.stats().bases_disagreeing, caller.stats().overlapping_bases);
1194 }
1195
1196 #[test]
1197 fn test_stats_reset() {
1198 let mut caller = OverlappingBasesConsensusCaller::new(
1199 AgreementStrategy::Consensus,
1200 DisagreementStrategy::Consensus,
1201 );
1202
1203 let mut r1 = create_test_record(b"ACGT", &[30, 30, 30, 30], 100, "4M");
1204 let mut r2 = create_test_record(b"ACGT", &[20, 20, 20, 20], 100, "4M");
1205
1206 caller.call(&mut r1, &mut r2).unwrap();
1207 assert!(caller.stats().overlapping_bases > 0);
1208
1209 caller.reset_stats();
1210 assert_eq!(caller.stats().overlapping_bases, 0);
1211 assert_eq!(caller.stats().bases_agreeing, 0);
1212 assert_eq!(caller.stats().bases_disagreeing, 0);
1213 assert_eq!(caller.stats().bases_corrected, 0);
1214 }
1215
1216 #[test]
1217 fn test_overlap_different_start_positions() {
1218 let mut caller = OverlappingBasesConsensusCaller::new(
1220 AgreementStrategy::Consensus,
1221 DisagreementStrategy::Consensus,
1222 );
1223
1224 let mut r1 = create_test_record(b"ACGT", &[30, 30, 30, 30], 100, "4M");
1228 let mut r2 = create_test_record(b"GTAC", &[20, 20, 20, 20], 102, "4M");
1229
1230 let result = caller.call(&mut r1, &mut r2).unwrap();
1231
1232 assert!(result);
1234 assert_eq!(caller.stats().overlapping_bases, 2);
1235 assert_eq!(caller.stats().bases_agreeing, 2); assert_eq!(r1.quality_scores().as_ref()[2], 50);
1241 assert_eq!(r1.quality_scores().as_ref()[3], 50);
1242 }
1243
1244 #[test]
1245 fn test_full_overlap_same_position() {
1246 let mut caller = OverlappingBasesConsensusCaller::new(
1247 AgreementStrategy::Consensus,
1248 DisagreementStrategy::Consensus,
1249 );
1250
1251 let mut r1 = create_test_record(b"ACGT", &[30, 30, 30, 30], 100, "4M");
1253 let mut r2 = create_test_record(b"ACGT", &[20, 20, 20, 20], 100, "4M");
1254
1255 let result = caller.call(&mut r1, &mut r2).unwrap();
1256 assert!(result);
1257
1258 assert!(caller.stats().overlapping_bases > 0);
1260 assert_eq!(caller.stats().bases_agreeing, caller.stats().overlapping_bases);
1261 }
1262
1263 #[test]
1264 fn test_correction_stats() {
1265 let mut stats = CorrectionStats::new();
1266 assert_eq!(stats.overlapping_bases, 0);
1267 assert_eq!(stats.bases_agreeing, 0);
1268 assert_eq!(stats.bases_disagreeing, 0);
1269 assert_eq!(stats.bases_corrected, 0);
1270
1271 stats.overlapping_bases = 10;
1272 stats.bases_agreeing = 8;
1273 stats.bases_disagreeing = 2;
1274 stats.bases_corrected = 3;
1275
1276 stats.reset();
1277 assert_eq!(stats.overlapping_bases, 0);
1278 }
1279
1280 #[test]
1281 fn test_cigar_with_insertions() {
1282 let mut caller = OverlappingBasesConsensusCaller::new(
1283 AgreementStrategy::Consensus,
1284 DisagreementStrategy::Consensus,
1285 );
1286
1287 let mut r1 = create_test_record(b"ACTTGG", &[30, 30, 30, 30, 30, 30], 100, "2M2I2M");
1289 let mut r2 = create_test_record(b"ACGG", &[20, 20, 20, 20], 100, "4M");
1290
1291 let result = caller.call(&mut r1, &mut r2).unwrap();
1292 assert!(result);
1294 }
1295
1296 #[test]
1297 fn test_cigar_with_deletions() {
1298 let mut caller = OverlappingBasesConsensusCaller::new(
1299 AgreementStrategy::Consensus,
1300 DisagreementStrategy::Consensus,
1301 );
1302
1303 let mut r1 = create_test_record(b"ACGG", &[30, 30, 30, 30], 100, "2M2D2M");
1305 let mut r2 = create_test_record(b"ACTTGG", &[20, 20, 20, 20, 20, 20], 100, "6M");
1306
1307 let result = caller.call(&mut r1, &mut r2).unwrap();
1308 assert!(result);
1310 }
1311
1312 #[test]
1313 fn test_cigar_with_soft_clips_different_structure() {
1314 let mut caller = OverlappingBasesConsensusCaller::new(
1317 AgreementStrategy::Consensus,
1318 DisagreementStrategy::Consensus,
1319 );
1320
1321 let mut r1 = create_test_record(b"NNACGT", &[2, 2, 30, 30, 30, 30], 100, "2S4M");
1325 let mut r2 = create_test_record(b"ACGT", &[20, 20, 20, 20], 100, "4M");
1326
1327 let result = caller.call(&mut r1, &mut r2).unwrap();
1328
1329 assert!(result);
1334 assert_eq!(caller.stats().overlapping_bases, 4);
1335 assert_eq!(caller.stats().bases_agreeing, 4); assert_eq!(r1.quality_scores().as_ref()[2], 50); assert_eq!(r1.quality_scores().as_ref()[3], 50);
1340 assert_eq!(r1.quality_scores().as_ref()[4], 50);
1341 assert_eq!(r1.quality_scores().as_ref()[5], 50);
1342 }
1343
1344 #[test]
1345 fn test_cigar_soft_clips_both_reads_different_lengths() {
1346 let mut caller = OverlappingBasesConsensusCaller::new(
1348 AgreementStrategy::Consensus,
1349 DisagreementStrategy::Consensus,
1350 );
1351
1352 let mut r1 = create_test_record(b"NNNACG", &[2, 2, 2, 30, 30, 30], 100, "3S3M");
1355 let mut r2 = create_test_record(b"NACG", &[2, 20, 20, 20], 100, "1S3M");
1356
1357 let result = caller.call(&mut r1, &mut r2).unwrap();
1358 assert!(result);
1359
1360 assert_eq!(caller.stats().overlapping_bases, 3);
1364 assert_eq!(caller.stats().bases_agreeing, 3); }
1366
1367 #[test]
1368 fn test_cigar_soft_clips_both_reads_same_structure() {
1369 let mut caller = OverlappingBasesConsensusCaller::new(
1371 AgreementStrategy::Consensus,
1372 DisagreementStrategy::Consensus,
1373 );
1374
1375 let mut r1 = create_test_record(b"NACG", &[2, 30, 30, 30], 100, "1S3M");
1377 let mut r2 = create_test_record(b"NACG", &[2, 20, 20, 20], 100, "1S3M");
1378
1379 let result = caller.call(&mut r1, &mut r2).unwrap();
1380 assert!(result);
1381
1382 assert_eq!(caller.stats().overlapping_bases, 3);
1384 assert_eq!(caller.stats().bases_agreeing, 3);
1385 }
1386
1387 #[test]
1388 fn test_disagreement_consensus_min_quality() {
1389 let mut caller = OverlappingBasesConsensusCaller::new(
1390 AgreementStrategy::PassThrough,
1391 DisagreementStrategy::Consensus,
1392 );
1393
1394 let mut r1 = create_test_record(b"ACGT", &[30, 30, 30, 30], 100, "4M");
1396 let mut r2 = create_test_record(b"GCTA", &[29, 29, 29, 29], 100, "4M");
1397
1398 let result = caller.call(&mut r1, &mut r2).unwrap();
1399 assert!(result);
1400
1401 assert_eq!(r1.quality_scores().as_ref()[0], 2);
1403 assert_eq!(r2.quality_scores().as_ref()[0], 2);
1404 }
1405
1406 fn base_to_4bit(b: u8) -> u8 {
1412 match b {
1413 b'A' | b'a' => 1,
1414 b'C' | b'c' => 2,
1415 b'G' | b'g' => 4,
1416 b'T' | b't' => 8,
1417 b'N' | b'n' => 15,
1418 _ => 15,
1419 }
1420 }
1421
1422 fn pack_seq(bases: &[u8]) -> Vec<u8> {
1424 let mut packed = Vec::with_capacity(bases.len().div_ceil(2));
1425 for pair in bases.chunks(2) {
1426 let hi = base_to_4bit(pair[0]);
1427 let lo = if pair.len() > 1 { base_to_4bit(pair[1]) } else { 0 };
1428 packed.push((hi << 4) | lo);
1429 }
1430 packed
1431 }
1432
1433 fn make_raw_bam(
1440 name: &[u8],
1441 flag: u16,
1442 tid: i32,
1443 pos_0based: i32,
1444 cigar_ops: &[u32],
1445 seq: &[u8],
1446 qual: &[u8],
1447 ) -> Vec<u8> {
1448 let l_read_name = (name.len() + 1) as u8; let n_cigar_op = cigar_ops.len() as u16;
1450 let seq_len = seq.len();
1451 let packed_seq = pack_seq(seq);
1452 let total = 32 + l_read_name as usize + cigar_ops.len() * 4 + packed_seq.len() + seq_len;
1453 let mut buf = vec![0u8; total];
1454
1455 buf[0..4].copy_from_slice(&tid.to_le_bytes());
1457 buf[4..8].copy_from_slice(&pos_0based.to_le_bytes());
1458 buf[8] = l_read_name;
1459 buf[9] = 60; buf[10..12].copy_from_slice(&0u16.to_le_bytes()); buf[12..14].copy_from_slice(&n_cigar_op.to_le_bytes());
1462 buf[14..16].copy_from_slice(&flag.to_le_bytes());
1463 buf[16..20].copy_from_slice(&(seq_len as u32).to_le_bytes());
1464 buf[20..24].copy_from_slice(&(-1i32).to_le_bytes()); buf[24..28].copy_from_slice(&(-1i32).to_le_bytes()); buf[28..32].copy_from_slice(&0i32.to_le_bytes()); let name_start = 32;
1470 buf[name_start..name_start + name.len()].copy_from_slice(name);
1471 buf[name_start + name.len()] = 0;
1472
1473 let cigar_start = name_start + l_read_name as usize;
1475 for (i, &op) in cigar_ops.iter().enumerate() {
1476 let off = cigar_start + i * 4;
1477 buf[off..off + 4].copy_from_slice(&op.to_le_bytes());
1478 }
1479
1480 let seq_start = cigar_start + cigar_ops.len() * 4;
1482 buf[seq_start..seq_start + packed_seq.len()].copy_from_slice(&packed_seq);
1483
1484 let qual_start = seq_start + packed_seq.len();
1486 buf[qual_start..qual_start + qual.len()].copy_from_slice(qual);
1487
1488 buf
1489 }
1490
1491 fn cigar_op(len: usize, op_type: u8) -> u32 {
1494 ((len as u32) << 4) | u32::from(op_type)
1495 }
1496
1497 fn create_raw_test_record(
1501 seq: &[u8],
1502 qual: &[u8],
1503 start_1based: usize,
1504 cigar: &[u32],
1505 ) -> Vec<u8> {
1506 let pos_0based = (start_1based as i32) - 1;
1507 make_raw_bam(b"rea", 0, 0, pos_0based, cigar, seq, qual)
1509 }
1510
1511 #[test]
1512 fn test_raw_agreement_strategy_consensus() {
1513 let mut caller = OverlappingBasesConsensusCaller::new(
1514 AgreementStrategy::Consensus,
1515 DisagreementStrategy::Consensus,
1516 );
1517
1518 let cigar = [cigar_op(4, 0)]; let mut r1 = create_raw_test_record(b"ACGT", &[30, 30, 30, 30], 100, &cigar);
1520 let mut r2 = create_raw_test_record(b"ACGT", &[20, 20, 20, 20], 100, &cigar);
1521
1522 let result = caller.call_raw(&mut r1, &mut r2).unwrap();
1523 assert!(result);
1524
1525 assert_eq!(fgumi_raw_bam::quality_scores_slice(&r1)[0], 50);
1527 assert_eq!(fgumi_raw_bam::quality_scores_slice(&r2)[0], 50);
1528 }
1529
1530 #[test]
1531 fn test_raw_agreement_strategy_max_qual() {
1532 let mut caller = OverlappingBasesConsensusCaller::new(
1533 AgreementStrategy::MaxQual,
1534 DisagreementStrategy::Consensus,
1535 );
1536
1537 let cigar = [cigar_op(4, 0)]; let mut r1 = create_raw_test_record(b"ACGT", &[30, 30, 30, 30], 100, &cigar);
1539 let mut r2 = create_raw_test_record(b"ACGT", &[20, 20, 20, 20], 100, &cigar);
1540
1541 let result = caller.call_raw(&mut r1, &mut r2).unwrap();
1542 assert!(result);
1543
1544 assert_eq!(fgumi_raw_bam::quality_scores_slice(&r1)[0], 30);
1546 assert_eq!(fgumi_raw_bam::quality_scores_slice(&r2)[0], 30);
1547 }
1548
1549 #[test]
1550 fn test_raw_agreement_strategy_pass_through() {
1551 let mut caller = OverlappingBasesConsensusCaller::new(
1552 AgreementStrategy::PassThrough,
1553 DisagreementStrategy::Consensus,
1554 );
1555
1556 let cigar = [cigar_op(4, 0)]; let mut r1 = create_raw_test_record(b"ACGT", &[30, 30, 30, 30], 100, &cigar);
1558 let mut r2 = create_raw_test_record(b"ACGT", &[20, 20, 20, 20], 100, &cigar);
1559
1560 let result = caller.call_raw(&mut r1, &mut r2).unwrap();
1561 assert!(result);
1562
1563 assert_eq!(fgumi_raw_bam::quality_scores_slice(&r1)[0], 30);
1565 assert_eq!(fgumi_raw_bam::quality_scores_slice(&r2)[0], 20);
1566 }
1567
1568 #[test]
1569 fn test_raw_disagreement_strategy_consensus() {
1570 let mut caller = OverlappingBasesConsensusCaller::new(
1571 AgreementStrategy::PassThrough,
1572 DisagreementStrategy::Consensus,
1573 );
1574
1575 let cigar = [cigar_op(4, 0)]; let mut r1 = create_raw_test_record(b"ACGT", &[30, 30, 30, 30], 100, &cigar);
1577 let mut r2 = create_raw_test_record(b"GCTA", &[20, 20, 20, 20], 100, &cigar);
1578
1579 let result = caller.call_raw(&mut r1, &mut r2).unwrap();
1580 assert!(result);
1581
1582 let r1_seq = fgumi_raw_bam::extract_sequence(&r1);
1584 let r2_seq = fgumi_raw_bam::extract_sequence(&r2);
1585 assert_eq!(r1_seq[0], b'A');
1586 assert_eq!(r2_seq[0], b'A');
1587 assert_eq!(fgumi_raw_bam::quality_scores_slice(&r1)[0], 10);
1588 assert_eq!(fgumi_raw_bam::quality_scores_slice(&r2)[0], 10);
1589 }
1590
1591 #[test]
1592 fn test_raw_disagreement_strategy_mask_both() {
1593 let mut caller = OverlappingBasesConsensusCaller::new(
1594 AgreementStrategy::PassThrough,
1595 DisagreementStrategy::MaskBoth,
1596 );
1597
1598 let cigar = [cigar_op(4, 0)]; let mut r1 = create_raw_test_record(b"ACGT", &[30, 30, 30, 30], 100, &cigar);
1600 let mut r2 = create_raw_test_record(b"GCTA", &[20, 20, 20, 20], 100, &cigar);
1601
1602 let result = caller.call_raw(&mut r1, &mut r2).unwrap();
1603 assert!(result);
1604
1605 let r1_seq = fgumi_raw_bam::extract_sequence(&r1);
1607 let r2_seq = fgumi_raw_bam::extract_sequence(&r2);
1608 assert_eq!(r1_seq[0], b'N');
1609 assert_eq!(r2_seq[0], b'N');
1610 assert_eq!(fgumi_raw_bam::quality_scores_slice(&r1)[0], 2);
1611 assert_eq!(fgumi_raw_bam::quality_scores_slice(&r2)[0], 2);
1612 }
1613
1614 #[test]
1615 fn test_raw_disagreement_strategy_mask_lower_qual() {
1616 let mut caller = OverlappingBasesConsensusCaller::new(
1617 AgreementStrategy::PassThrough,
1618 DisagreementStrategy::MaskLowerQual,
1619 );
1620
1621 let cigar = [cigar_op(4, 0)]; let mut r1 = create_raw_test_record(b"ACGT", &[30, 30, 30, 30], 100, &cigar);
1623 let mut r2 = create_raw_test_record(b"GCTA", &[20, 20, 20, 20], 100, &cigar);
1624
1625 let result = caller.call_raw(&mut r1, &mut r2).unwrap();
1626 assert!(result);
1627
1628 let r1_seq = fgumi_raw_bam::extract_sequence(&r1);
1630 let r2_seq = fgumi_raw_bam::extract_sequence(&r2);
1631 assert_eq!(r1_seq[0], b'A'); assert_eq!(r2_seq[0], b'N'); assert_eq!(fgumi_raw_bam::quality_scores_slice(&r1)[0], 30); assert_eq!(fgumi_raw_bam::quality_scores_slice(&r2)[0], 2); }
1636
1637 #[test]
1638 fn test_raw_disagreement_mask_lower_qual_r1_lower() {
1639 let mut caller = OverlappingBasesConsensusCaller::new(
1640 AgreementStrategy::PassThrough,
1641 DisagreementStrategy::MaskLowerQual,
1642 );
1643
1644 let cigar = [cigar_op(4, 0)]; let mut r1 = create_raw_test_record(b"ACGT", &[20, 20, 20, 20], 100, &cigar);
1646 let mut r2 = create_raw_test_record(b"GCTA", &[30, 30, 30, 30], 100, &cigar);
1647
1648 let result = caller.call_raw(&mut r1, &mut r2).unwrap();
1649 assert!(result);
1650
1651 let r1_seq = fgumi_raw_bam::extract_sequence(&r1);
1653 let r2_seq = fgumi_raw_bam::extract_sequence(&r2);
1654 assert_eq!(r1_seq[0], b'N'); assert_eq!(r2_seq[0], b'G'); }
1657
1658 #[test]
1659 fn test_raw_disagreement_mask_lower_qual_equal_quality() {
1660 let mut caller = OverlappingBasesConsensusCaller::new(
1661 AgreementStrategy::PassThrough,
1662 DisagreementStrategy::MaskLowerQual,
1663 );
1664
1665 let cigar = [cigar_op(4, 0)]; let mut r1 = create_raw_test_record(b"ACGT", &[30, 30, 30, 30], 100, &cigar);
1667 let mut r2 = create_raw_test_record(b"GCTA", &[30, 30, 30, 30], 100, &cigar);
1668
1669 let result = caller.call_raw(&mut r1, &mut r2).unwrap();
1670 assert!(result);
1671
1672 let r1_seq = fgumi_raw_bam::extract_sequence(&r1);
1674 let r2_seq = fgumi_raw_bam::extract_sequence(&r2);
1675 assert_eq!(r1_seq[0], b'N');
1676 assert_eq!(r2_seq[0], b'N');
1677 assert_eq!(fgumi_raw_bam::quality_scores_slice(&r1)[0], 2);
1678 assert_eq!(fgumi_raw_bam::quality_scores_slice(&r2)[0], 2);
1679 }
1680
1681 #[test]
1682 fn test_raw_disagreement_consensus_equal_quality() {
1683 let mut caller = OverlappingBasesConsensusCaller::new(
1684 AgreementStrategy::PassThrough,
1685 DisagreementStrategy::Consensus,
1686 );
1687
1688 let cigar = [cigar_op(4, 0)]; let mut r1 = create_raw_test_record(b"ACGT", &[30, 30, 30, 30], 100, &cigar);
1690 let mut r2 = create_raw_test_record(b"GCTA", &[30, 30, 30, 30], 100, &cigar);
1691
1692 let result = caller.call_raw(&mut r1, &mut r2).unwrap();
1693 assert!(result);
1694
1695 let r1_seq = fgumi_raw_bam::extract_sequence(&r1);
1697 let r2_seq = fgumi_raw_bam::extract_sequence(&r2);
1698 assert_eq!(r1_seq[0], b'N');
1699 assert_eq!(r2_seq[0], b'N');
1700 assert_eq!(fgumi_raw_bam::quality_scores_slice(&r1)[0], 2);
1701 assert_eq!(fgumi_raw_bam::quality_scores_slice(&r2)[0], 2);
1702 }
1703
1704 #[test]
1705 fn test_raw_no_overlap() {
1706 let mut caller = OverlappingBasesConsensusCaller::new(
1707 AgreementStrategy::Consensus,
1708 DisagreementStrategy::Consensus,
1709 );
1710
1711 let cigar = [cigar_op(4, 0)]; let mut r1 = create_raw_test_record(b"ACGT", &[30, 30, 30, 30], 100, &cigar);
1714 let mut r2 = create_raw_test_record(b"ACGT", &[20, 20, 20, 20], 200, &cigar);
1715
1716 let result = caller.call_raw(&mut r1, &mut r2).unwrap();
1717 assert!(!result);
1718 }
1719
1720 #[test]
1721 fn test_raw_unmapped_reads() {
1722 let mut caller = OverlappingBasesConsensusCaller::new(
1723 AgreementStrategy::Consensus,
1724 DisagreementStrategy::Consensus,
1725 );
1726
1727 let cigar = [cigar_op(4, 0)]; let mut r1 = make_raw_bam(
1730 b"rea",
1731 fgumi_raw_bam::flags::UNMAPPED,
1732 0,
1733 99,
1734 &cigar,
1735 b"ACGT",
1736 &[30, 30, 30, 30],
1737 );
1738 let mut r2 = create_raw_test_record(b"ACGT", &[20, 20, 20, 20], 100, &cigar);
1739
1740 let result = caller.call_raw(&mut r1, &mut r2).unwrap();
1741 assert!(!result);
1742 }
1743
1744 #[test]
1745 fn test_raw_different_references() {
1746 let mut caller = OverlappingBasesConsensusCaller::new(
1747 AgreementStrategy::Consensus,
1748 DisagreementStrategy::Consensus,
1749 );
1750
1751 let cigar = [cigar_op(4, 0)]; let mut r1 = make_raw_bam(b"rea", 0, 0, 99, &cigar, b"ACGT", &[30, 30, 30, 30]);
1754 let mut r2 = make_raw_bam(b"rea", 0, 1, 99, &cigar, b"ACGT", &[20, 20, 20, 20]);
1755
1756 let result = caller.call_raw(&mut r1, &mut r2).unwrap();
1757 assert!(!result);
1758 }
1759
1760 #[test]
1761 fn test_raw_quality_capping_at_93() {
1762 let mut caller = OverlappingBasesConsensusCaller::new(
1763 AgreementStrategy::Consensus,
1764 DisagreementStrategy::Consensus,
1765 );
1766
1767 let cigar = [cigar_op(4, 0)]; let mut r1 = create_raw_test_record(b"ACGT", &[50, 50, 50, 50], 100, &cigar);
1769 let mut r2 = create_raw_test_record(b"ACGT", &[50, 50, 50, 50], 100, &cigar);
1770
1771 let result = caller.call_raw(&mut r1, &mut r2).unwrap();
1772 assert!(result);
1773
1774 assert_eq!(fgumi_raw_bam::quality_scores_slice(&r1)[0], 93);
1776 }
1777
1778 #[test]
1779 fn test_raw_stats_tracking() {
1780 let mut caller = OverlappingBasesConsensusCaller::new(
1781 AgreementStrategy::Consensus,
1782 DisagreementStrategy::Consensus,
1783 );
1784
1785 let cigar = [cigar_op(4, 0)]; let mut r1 = create_raw_test_record(b"ACGT", &[30, 30, 30, 30], 100, &cigar);
1787 let mut r2 = create_raw_test_record(b"ACGT", &[20, 20, 20, 20], 100, &cigar);
1788
1789 let result = caller.call_raw(&mut r1, &mut r2).unwrap();
1790 assert!(result);
1791
1792 assert!(caller.stats().overlapping_bases > 0);
1793 assert_eq!(caller.stats().bases_agreeing, caller.stats().overlapping_bases);
1794 assert_eq!(caller.stats().bases_disagreeing, 0);
1795 assert_eq!(caller.stats().bases_corrected, caller.stats().overlapping_bases);
1796 }
1797
1798 #[test]
1799 fn test_raw_stats_tracking_with_disagreements() {
1800 let mut caller = OverlappingBasesConsensusCaller::new(
1801 AgreementStrategy::PassThrough,
1802 DisagreementStrategy::Consensus,
1803 );
1804
1805 let cigar = [cigar_op(4, 0)]; let mut r1 = create_raw_test_record(b"ACGT", &[30, 30, 30, 30], 100, &cigar);
1807 let mut r2 = create_raw_test_record(b"TGCA", &[20, 20, 20, 20], 100, &cigar);
1808
1809 let result = caller.call_raw(&mut r1, &mut r2).unwrap();
1810 assert!(result);
1811
1812 assert!(caller.stats().overlapping_bases > 0);
1813 assert_eq!(caller.stats().bases_agreeing, 0);
1814 assert_eq!(caller.stats().bases_disagreeing, caller.stats().overlapping_bases);
1815 }
1816
1817 #[test]
1818 fn test_raw_overlap_different_start_positions() {
1819 let mut caller = OverlappingBasesConsensusCaller::new(
1820 AgreementStrategy::Consensus,
1821 DisagreementStrategy::Consensus,
1822 );
1823
1824 let cigar = [cigar_op(4, 0)]; let mut r1 = create_raw_test_record(b"ACGT", &[30, 30, 30, 30], 100, &cigar);
1828 let mut r2 = create_raw_test_record(b"GTAC", &[20, 20, 20, 20], 102, &cigar);
1829
1830 let result = caller.call_raw(&mut r1, &mut r2).unwrap();
1831 assert!(result);
1832 assert_eq!(caller.stats().overlapping_bases, 2);
1833 assert_eq!(caller.stats().bases_agreeing, 2); assert_eq!(fgumi_raw_bam::quality_scores_slice(&r1)[2], 50); assert_eq!(fgumi_raw_bam::quality_scores_slice(&r1)[3], 50);
1838 }
1839
1840 #[test]
1841 fn test_raw_cigar_with_soft_clips() {
1842 let mut caller = OverlappingBasesConsensusCaller::new(
1843 AgreementStrategy::Consensus,
1844 DisagreementStrategy::Consensus,
1845 );
1846
1847 let r1_cigar = [cigar_op(2, 4), cigar_op(4, 0)]; let r2_cigar = [cigar_op(4, 0)]; let mut r1 = create_raw_test_record(b"NNACGT", &[2, 2, 30, 30, 30, 30], 100, &r1_cigar);
1851 let mut r2 = create_raw_test_record(b"ACGT", &[20, 20, 20, 20], 100, &r2_cigar);
1852
1853 let result = caller.call_raw(&mut r1, &mut r2).unwrap();
1854 assert!(result);
1855 assert_eq!(caller.stats().overlapping_bases, 4);
1856 assert_eq!(caller.stats().bases_agreeing, 4); assert_eq!(fgumi_raw_bam::quality_scores_slice(&r1)[2], 50); assert_eq!(fgumi_raw_bam::quality_scores_slice(&r1)[3], 50);
1861 assert_eq!(fgumi_raw_bam::quality_scores_slice(&r1)[4], 50);
1862 assert_eq!(fgumi_raw_bam::quality_scores_slice(&r1)[5], 50);
1863 }
1864
1865 #[test]
1866 fn test_raw_cigar_with_insertions() {
1867 let mut caller = OverlappingBasesConsensusCaller::new(
1868 AgreementStrategy::Consensus,
1869 DisagreementStrategy::Consensus,
1870 );
1871
1872 let r1_cigar = [cigar_op(2, 0), cigar_op(2, 1), cigar_op(2, 0)]; let r2_cigar = [cigar_op(4, 0)]; let mut r1 = create_raw_test_record(b"ACTTGG", &[30, 30, 30, 30, 30, 30], 100, &r1_cigar);
1876 let mut r2 = create_raw_test_record(b"ACGG", &[20, 20, 20, 20], 100, &r2_cigar);
1877
1878 let result = caller.call_raw(&mut r1, &mut r2).unwrap();
1879 assert!(result);
1880 }
1881
1882 #[test]
1883 fn test_raw_cigar_with_deletions() {
1884 let mut caller = OverlappingBasesConsensusCaller::new(
1885 AgreementStrategy::Consensus,
1886 DisagreementStrategy::Consensus,
1887 );
1888
1889 let r1_cigar = [cigar_op(2, 0), cigar_op(2, 2), cigar_op(2, 0)]; let r2_cigar = [cigar_op(6, 0)]; let mut r1 = create_raw_test_record(b"ACGG", &[30, 30, 30, 30], 100, &r1_cigar);
1893 let mut r2 = create_raw_test_record(b"ACTTGG", &[20, 20, 20, 20, 20, 20], 100, &r2_cigar);
1894
1895 let result = caller.call_raw(&mut r1, &mut r2).unwrap();
1896 assert!(result);
1897 }
1898
1899 #[test]
1900 fn test_raw_disagreement_consensus_min_quality() {
1901 let mut caller = OverlappingBasesConsensusCaller::new(
1902 AgreementStrategy::PassThrough,
1903 DisagreementStrategy::Consensus,
1904 );
1905
1906 let cigar = [cigar_op(4, 0)]; let mut r1 = create_raw_test_record(b"ACGT", &[30, 30, 30, 30], 100, &cigar);
1908 let mut r2 = create_raw_test_record(b"GCTA", &[29, 29, 29, 29], 100, &cigar);
1909
1910 let result = caller.call_raw(&mut r1, &mut r2).unwrap();
1911 assert!(result);
1912
1913 assert_eq!(fgumi_raw_bam::quality_scores_slice(&r1)[0], 2);
1915 assert_eq!(fgumi_raw_bam::quality_scores_slice(&r2)[0], 2);
1916 }
1917
1918 #[test]
1920 fn test_raw_matches_recordbuf_agreement() {
1921 let mut caller_buf = OverlappingBasesConsensusCaller::new(
1922 AgreementStrategy::Consensus,
1923 DisagreementStrategy::Consensus,
1924 );
1925 let mut caller_raw = OverlappingBasesConsensusCaller::new(
1926 AgreementStrategy::Consensus,
1927 DisagreementStrategy::Consensus,
1928 );
1929
1930 let mut rb_r1 = create_test_record(b"ACGT", &[30, 30, 30, 30], 100, "4M");
1932 let mut rb_r2 = create_test_record(b"ACGT", &[20, 20, 20, 20], 100, "4M");
1933 caller_buf.call(&mut rb_r1, &mut rb_r2).unwrap();
1934
1935 let cigar = [cigar_op(4, 0)];
1937 let mut raw_r1 = create_raw_test_record(b"ACGT", &[30, 30, 30, 30], 100, &cigar);
1938 let mut raw_r2 = create_raw_test_record(b"ACGT", &[20, 20, 20, 20], 100, &cigar);
1939 caller_raw.call_raw(&mut raw_r1, &mut raw_r2).unwrap();
1940
1941 assert_eq!(rb_r1.quality_scores().as_ref(), fgumi_raw_bam::quality_scores_slice(&raw_r1));
1943 assert_eq!(rb_r2.quality_scores().as_ref(), fgumi_raw_bam::quality_scores_slice(&raw_r2));
1944 assert_eq!(caller_buf.stats().overlapping_bases, caller_raw.stats().overlapping_bases);
1945 assert_eq!(caller_buf.stats().bases_agreeing, caller_raw.stats().bases_agreeing);
1946 assert_eq!(caller_buf.stats().bases_corrected, caller_raw.stats().bases_corrected);
1947 }
1948
1949 #[test]
1951 fn test_raw_matches_recordbuf_disagreement() {
1952 let mut caller_buf = OverlappingBasesConsensusCaller::new(
1953 AgreementStrategy::PassThrough,
1954 DisagreementStrategy::MaskBoth,
1955 );
1956 let mut caller_raw = OverlappingBasesConsensusCaller::new(
1957 AgreementStrategy::PassThrough,
1958 DisagreementStrategy::MaskBoth,
1959 );
1960
1961 let mut rb_r1 = create_test_record(b"ACGT", &[30, 30, 30, 30], 100, "4M");
1963 let mut rb_r2 = create_test_record(b"GCTA", &[20, 20, 20, 20], 100, "4M");
1964 caller_buf.call(&mut rb_r1, &mut rb_r2).unwrap();
1965
1966 let cigar = [cigar_op(4, 0)];
1968 let mut raw_r1 = create_raw_test_record(b"ACGT", &[30, 30, 30, 30], 100, &cigar);
1969 let mut raw_r2 = create_raw_test_record(b"GCTA", &[20, 20, 20, 20], 100, &cigar);
1970 caller_raw.call_raw(&mut raw_r1, &mut raw_r2).unwrap();
1971
1972 let buf_r1_seq: Vec<u8> = rb_r1.sequence().as_ref().to_vec();
1974 let buf_r2_seq: Vec<u8> = rb_r2.sequence().as_ref().to_vec();
1975 let raw_r1_seq = fgumi_raw_bam::extract_sequence(&raw_r1);
1976 let raw_r2_seq = fgumi_raw_bam::extract_sequence(&raw_r2);
1977 assert_eq!(buf_r1_seq, raw_r1_seq);
1978 assert_eq!(buf_r2_seq, raw_r2_seq);
1979
1980 assert_eq!(rb_r1.quality_scores().as_ref(), fgumi_raw_bam::quality_scores_slice(&raw_r1));
1982 assert_eq!(rb_r2.quality_scores().as_ref(), fgumi_raw_bam::quality_scores_slice(&raw_r2));
1983
1984 assert_eq!(caller_buf.stats().overlapping_bases, caller_raw.stats().overlapping_bases);
1986 assert_eq!(caller_buf.stats().bases_disagreeing, caller_raw.stats().bases_disagreeing);
1987 assert_eq!(caller_buf.stats().bases_corrected, caller_raw.stats().bases_corrected);
1988 }
1989
1990 #[test]
1995 fn test_apply_overlapping_consensus_raw_pair() {
1996 let mut caller = OverlappingBasesConsensusCaller::new(
1997 AgreementStrategy::Consensus,
1998 DisagreementStrategy::Consensus,
1999 );
2000
2001 let cigar = [cigar_op(4, 0)]; let r1_flag = fgumi_raw_bam::flags::PAIRED | fgumi_raw_bam::flags::FIRST_SEGMENT;
2004 let r1 = make_raw_bam(b"rea", r1_flag, 0, 99, &cigar, b"ACGT", &[30, 30, 30, 30]);
2005 let r2_flag = fgumi_raw_bam::flags::PAIRED | fgumi_raw_bam::flags::LAST_SEGMENT;
2007 let r2 = make_raw_bam(b"rea", r2_flag, 0, 99, &cigar, b"ACGT", &[20, 20, 20, 20]);
2008
2009 let mut records = vec![r1, r2];
2010 apply_overlapping_consensus_raw(&mut records, &mut caller).unwrap();
2011
2012 assert_eq!(fgumi_raw_bam::quality_scores_slice(&records[0])[0], 50);
2014 assert_eq!(fgumi_raw_bam::quality_scores_slice(&records[1])[0], 50);
2015 assert!(caller.stats().overlapping_bases > 0);
2016 }
2017
2018 #[test]
2019 fn test_apply_overlapping_consensus_raw_no_pair() {
2020 let mut caller = OverlappingBasesConsensusCaller::new(
2021 AgreementStrategy::Consensus,
2022 DisagreementStrategy::Consensus,
2023 );
2024
2025 let cigar = [cigar_op(4, 0)]; let r1_flag = fgumi_raw_bam::flags::PAIRED | fgumi_raw_bam::flags::FIRST_SEGMENT;
2028 let r1 = make_raw_bam(b"rea", r1_flag, 0, 99, &cigar, b"ACGT", &[30, 30, 30, 30]);
2029 let r2_flag = fgumi_raw_bam::flags::PAIRED | fgumi_raw_bam::flags::LAST_SEGMENT;
2031 let r2 = make_raw_bam(b"reb", r2_flag, 0, 99, &cigar, b"ACGT", &[20, 20, 20, 20]);
2032
2033 let mut records = vec![r1, r2];
2034 apply_overlapping_consensus_raw(&mut records, &mut caller).unwrap();
2035
2036 assert_eq!(fgumi_raw_bam::quality_scores_slice(&records[0])[0], 30);
2038 assert_eq!(fgumi_raw_bam::quality_scores_slice(&records[1])[0], 20);
2039 assert_eq!(caller.stats().overlapping_bases, 0);
2040 }
2041
2042 #[test]
2043 fn test_apply_overlapping_consensus_raw_reversed_indices() {
2044 let mut caller = OverlappingBasesConsensusCaller::new(
2046 AgreementStrategy::Consensus,
2047 DisagreementStrategy::Consensus,
2048 );
2049
2050 let cigar = [cigar_op(4, 0)]; let r2_flag = fgumi_raw_bam::flags::PAIRED | fgumi_raw_bam::flags::LAST_SEGMENT;
2052 let r2 = make_raw_bam(b"rea", r2_flag, 0, 99, &cigar, b"ACGT", &[20, 20, 20, 20]);
2053 let r1_flag = fgumi_raw_bam::flags::PAIRED | fgumi_raw_bam::flags::FIRST_SEGMENT;
2054 let r1 = make_raw_bam(b"rea", r1_flag, 0, 99, &cigar, b"ACGT", &[30, 30, 30, 30]);
2055
2056 let mut records = vec![r2, r1];
2058 apply_overlapping_consensus_raw(&mut records, &mut caller).unwrap();
2059
2060 assert!(caller.stats().overlapping_bases > 0);
2062 assert_eq!(
2063 fgumi_raw_bam::quality_scores_slice(&records[0])[0],
2064 fgumi_raw_bam::quality_scores_slice(&records[1])[0]
2065 );
2066 }
2067
2068 #[test]
2073 fn test_correction_stats_merge() {
2074 let mut stats_a = CorrectionStats {
2075 overlapping_bases: 10,
2076 bases_agreeing: 8,
2077 bases_disagreeing: 2,
2078 bases_corrected: 3,
2079 };
2080
2081 let stats_b = CorrectionStats {
2082 overlapping_bases: 5,
2083 bases_agreeing: 4,
2084 bases_disagreeing: 1,
2085 bases_corrected: 2,
2086 };
2087
2088 stats_a.merge(&stats_b);
2089
2090 assert_eq!(stats_a.overlapping_bases, 15);
2091 assert_eq!(stats_a.bases_agreeing, 12);
2092 assert_eq!(stats_a.bases_disagreeing, 3);
2093 assert_eq!(stats_a.bases_corrected, 5);
2094 }
2095
2096 #[test]
2097 fn test_correction_stats_merge_with_empty() {
2098 let mut stats = CorrectionStats {
2099 overlapping_bases: 10,
2100 bases_agreeing: 8,
2101 bases_disagreeing: 2,
2102 bases_corrected: 3,
2103 };
2104
2105 let empty = CorrectionStats::new();
2106 stats.merge(&empty);
2107
2108 assert_eq!(stats.overlapping_bases, 10);
2110 assert_eq!(stats.bases_agreeing, 8);
2111 assert_eq!(stats.bases_disagreeing, 2);
2112 assert_eq!(stats.bases_corrected, 3);
2113 }
2114
2115 #[test]
2116 fn test_correction_stats_merge_into_empty() {
2117 let mut empty = CorrectionStats::new();
2118
2119 let stats = CorrectionStats {
2120 overlapping_bases: 7,
2121 bases_agreeing: 5,
2122 bases_disagreeing: 2,
2123 bases_corrected: 4,
2124 };
2125
2126 empty.merge(&stats);
2127
2128 assert_eq!(empty.overlapping_bases, 7);
2129 assert_eq!(empty.bases_agreeing, 5);
2130 assert_eq!(empty.bases_disagreeing, 2);
2131 assert_eq!(empty.bases_corrected, 4);
2132 }
2133
2134 #[test]
2135 fn test_correction_stats_merge_multiple() {
2136 let mut total = CorrectionStats::new();
2137
2138 let a = CorrectionStats {
2139 overlapping_bases: 3,
2140 bases_agreeing: 2,
2141 bases_disagreeing: 1,
2142 bases_corrected: 1,
2143 };
2144 let b = CorrectionStats {
2145 overlapping_bases: 5,
2146 bases_agreeing: 5,
2147 bases_disagreeing: 0,
2148 bases_corrected: 5,
2149 };
2150 let c = CorrectionStats {
2151 overlapping_bases: 2,
2152 bases_agreeing: 0,
2153 bases_disagreeing: 2,
2154 bases_corrected: 4,
2155 };
2156
2157 total.merge(&a);
2158 total.merge(&b);
2159 total.merge(&c);
2160
2161 assert_eq!(total.overlapping_bases, 10);
2162 assert_eq!(total.bases_agreeing, 7);
2163 assert_eq!(total.bases_disagreeing, 3);
2164 assert_eq!(total.bases_corrected, 10);
2165 }
2166
2167 #[test]
2168 fn test_kind_to_bam_op() {
2169 assert_eq!(kind_to_bam_op(Kind::Match), 0);
2170 assert_eq!(kind_to_bam_op(Kind::Insertion), 1);
2171 assert_eq!(kind_to_bam_op(Kind::Deletion), 2);
2172 assert_eq!(kind_to_bam_op(Kind::Skip), 3);
2173 assert_eq!(kind_to_bam_op(Kind::SoftClip), 4);
2174 assert_eq!(kind_to_bam_op(Kind::HardClip), 5);
2175 assert_eq!(kind_to_bam_op(Kind::Pad), 6);
2176 assert_eq!(kind_to_bam_op(Kind::SequenceMatch), 7);
2177 assert_eq!(kind_to_bam_op(Kind::SequenceMismatch), 8);
2178 }
2179
2180 #[test]
2181 fn test_unified_iterator_noodles_and_raw_agree() {
2182 let r1 = create_test_record(b"ACGTACGT", &[30; 8], 100, "8M");
2184 let r2 = create_test_record(b"ACGTACGT", &[20; 8], 104, "8M");
2185
2186 let noodles_iter = ReadMateAndRefPosIterator::new(&r1, &r2).unwrap();
2188 let noodles_positions: Vec<_> = noodles_iter.collect();
2189
2190 let cigar_8m = [cigar_op(8, 0)]; let r1_raw = create_raw_test_record(b"ACGTACGT", &[30; 8], 100, &cigar_8m);
2193 let r2_raw = create_raw_test_record(b"ACGTACGT", &[20; 8], 104, &cigar_8m);
2194
2195 let raw_iter = ReadMateAndRefPosIterator::new_raw(
2197 &r1_raw, &r2_raw, 100, 107, 104, 111, );
2200 let raw_positions: Vec<_> = raw_iter.collect();
2201
2202 assert_eq!(noodles_positions.len(), raw_positions.len());
2203 for (n, r) in noodles_positions.iter().zip(raw_positions.iter()) {
2204 assert_eq!(n.read_offset, r.read_offset);
2205 assert_eq!(n.mate_offset, r.mate_offset);
2206 }
2207 assert_eq!(noodles_positions.len(), 4);
2209 }
2210
2211 #[test]
2212 fn test_unified_iterator_with_deletion() {
2213 let r1 = create_test_record(b"ACGTACGT", &[30; 8], 100, "4M2D4M");
2216 let r2 = create_test_record(b"TTTT", &[20; 4], 106, "4M");
2217
2218 let iter = ReadMateAndRefPosIterator::new(&r1, &r2).unwrap();
2219 let positions: Vec<_> = iter.collect();
2220
2221 assert_eq!(positions.len(), 4);
2225 assert_eq!(positions[0].read_offset, 4);
2227 assert_eq!(positions[3].read_offset, 7);
2228 assert_eq!(positions[0].mate_offset, 0);
2230 assert_eq!(positions[3].mate_offset, 3);
2231 }
2232}