use noodles::sam::alignment::RecordBuf;
use noodles::sam::alignment::record::Cigar as CigarTrait;
use noodles::sam::alignment::record::cigar::Op;
use noodles::sam::alignment::record::cigar::op::Kind;
#[must_use]
pub fn count_overlapping_bases(record: &RecordBuf) -> u64 {
let flags = record.flags();
if !flags.is_segmented() || flags.is_unmapped() || flags.is_mate_unmapped() {
return 0;
}
let ref_id = record.reference_sequence_id();
let mate_ref_id = record.mate_reference_sequence_id();
if ref_id != mate_ref_id {
return 0;
}
let alignment_start = match record.alignment_start() {
Some(pos) => pos.get(),
None => return 0,
};
let mate_alignment_start = match record.mate_alignment_start() {
Some(pos) => pos.get(),
None => return 0,
};
if mate_alignment_start < alignment_start {
return 0;
}
if mate_alignment_start == alignment_start && flags.is_first_segment() {
return 0;
}
let mut ref_pos = alignment_start;
let mut bases_to_clip: u64 = 0;
for op_result in record.cigar().iter() {
let op: Op = match op_result {
Ok(op) => op,
Err(_) => return 0,
};
let kind = op.kind();
let len = op.len();
let consumes_ref = matches!(
kind,
Kind::Match
| Kind::SequenceMatch
| Kind::SequenceMismatch
| Kind::Deletion
| Kind::Skip
);
let ref_bases_len = if consumes_ref { len } else { 0 };
let element_end = ref_pos + ref_bases_len;
let past_mate = if ref_bases_len > 0 {
mate_alignment_start < ref_pos + ref_bases_len
} else {
mate_alignment_start <= ref_pos
};
if past_mate {
match kind {
Kind::Match | Kind::SequenceMatch | Kind::SequenceMismatch => {
if mate_alignment_start <= ref_pos {
bases_to_clip += len as u64;
} else {
bases_to_clip += (element_end - mate_alignment_start) as u64;
}
}
Kind::SoftClip | Kind::HardClip | Kind::Pad | Kind::Skip | Kind::Deletion => {
}
Kind::Insertion => {
bases_to_clip += len as u64;
}
}
}
ref_pos += ref_bases_len;
}
bases_to_clip
}
#[cfg(test)]
mod tests {
use super::*;
use noodles::core::Position;
use noodles::sam::alignment::record::cigar::Op;
use noodles::sam::alignment::record::{Flags, MappingQuality};
use noodles::sam::alignment::record_buf::{Cigar, QualityScores, Sequence};
fn make_record(
alignment_start: usize,
mate_start: usize,
cigar_ops: &[Op],
is_first_of_pair: bool,
) -> RecordBuf {
let read_len: usize = cigar_ops
.iter()
.filter(|op| {
matches!(
op.kind(),
Kind::Match
| Kind::SequenceMatch
| Kind::SequenceMismatch
| Kind::Insertion
| Kind::SoftClip
)
})
.map(|op| op.len())
.sum();
let mut flags = Flags::SEGMENTED | Flags::PROPERLY_SEGMENTED;
if is_first_of_pair {
flags |= Flags::FIRST_SEGMENT;
} else {
flags |= Flags::LAST_SEGMENT;
}
let cigar: Cigar = cigar_ops.iter().copied().collect();
let seq: Sequence = vec![b'A'; read_len].into();
let qual = QualityScores::from(vec![30u8; read_len]);
let mq = MappingQuality::new(60).unwrap();
RecordBuf::builder()
.set_name("read1")
.set_flags(flags)
.set_reference_sequence_id(0)
.set_alignment_start(Position::new(alignment_start).unwrap())
.set_mapping_quality(mq)
.set_cigar(cigar)
.set_mate_reference_sequence_id(0)
.set_mate_alignment_start(Position::new(mate_start).unwrap())
.set_template_length(0)
.set_sequence(seq)
.set_quality_scores(qual)
.build()
}
#[test]
fn test_simple_overlap() {
let rec = make_record(1, 6, &[Op::new(Kind::Match, 10)], true);
assert_eq!(count_overlapping_bases(&rec), 5);
}
#[test]
fn test_no_overlap() {
let rec = make_record(1, 20, &[Op::new(Kind::Match, 10)], true);
assert_eq!(count_overlapping_bases(&rec), 0);
}
#[test]
fn test_full_overlap() {
let rec = make_record(1, 1, &[Op::new(Kind::Match, 10)], false);
assert_eq!(count_overlapping_bases(&rec), 10);
}
#[test]
fn test_first_of_pair_tie_not_clipped() {
let rec = make_record(1, 1, &[Op::new(Kind::Match, 10)], true);
assert_eq!(count_overlapping_bases(&rec), 0);
}
#[test]
fn test_right_most_read_not_clipped() {
let rec = make_record(10, 5, &[Op::new(Kind::Match, 10)], true);
assert_eq!(count_overlapping_bases(&rec), 0);
}
#[test]
fn test_unpaired_not_clipped() {
let rec = RecordBuf::builder()
.set_flags(Flags::empty())
.set_reference_sequence_id(0)
.set_alignment_start(Position::new(1).unwrap())
.set_cigar([Op::new(Kind::Match, 10)].into_iter().collect::<Cigar>())
.set_sequence(vec![b'A'; 10].into())
.set_quality_scores(QualityScores::from(vec![30u8; 10]))
.build();
assert_eq!(count_overlapping_bases(&rec), 0);
}
#[test]
fn test_overlap_with_deletion() {
let rec = make_record(
1,
9,
&[Op::new(Kind::Match, 5), Op::new(Kind::Deletion, 2), Op::new(Kind::Match, 5)],
true,
);
assert_eq!(count_overlapping_bases(&rec), 4);
}
#[test]
fn test_overlap_with_insertion() {
let rec = make_record(
1,
4,
&[Op::new(Kind::Match, 5), Op::new(Kind::Insertion, 3), Op::new(Kind::Match, 5)],
true,
);
assert_eq!(count_overlapping_bases(&rec), 10);
}
}