use super::cigar::{CigarOp, CigarOpType};
use super::record_store::{RecordAccessError, RecordStore, SlimRecord};
use seqair_types::Pos0;
use thiserror::Error;
#[derive(Debug, Error)]
#[non_exhaustive]
pub enum AlignedPairsError {
#[error("record access error")]
Access {
#[from]
source: RecordAccessError,
},
#[error(
"unmapped record (pos = None) has {cigar_ops} CIGAR op(s); only an empty CIGAR is valid \
for unmapped records — check flags and CIGAR consistency before constructing the walk"
)]
UnmappedWithCigar { cigar_ops: usize },
#[error(
"CIGAR query-consuming length {cigar_qlen} != seq length {seq_len} — record's seq slab \
and CIGAR are out of sync"
)]
CigarSeqLengthMismatch { cigar_qlen: u64, seq_len: usize },
#[error(
"qual length {qual_len} != seq length {seq_len} — qual slab must be either empty (BAM \
missing-qual sentinel) or match seq length exactly"
)]
SeqQualLengthMismatch { seq_len: usize, qual_len: usize },
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub enum MatchKind {
Match,
SeqMatch,
SeqMismatch,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum AlignedPair {
Match { qpos: u32, rpos: Pos0, kind: MatchKind },
Insertion { qpos: u32, insert_len: u32 },
Deletion { rpos: Pos0, del_len: u32 },
RefSkip { rpos: Pos0, skip_len: u32 },
SoftClip { qpos: u32, len: u32 },
Padding { len: u32 },
Unknown { code: u8, len: u32 },
}
const _: () = assert!(
std::mem::size_of::<AlignedPair>() <= 16,
"AlignedPair grew past 16 bytes — revisit before accepting the hit"
);
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub struct MatchPosition {
pub qpos: u32,
pub rpos: Pos0,
pub kind: MatchKind,
}
#[derive(Debug, Clone, Copy, Default)]
pub struct AlignedPairsOptions {
pub soft_clips: bool,
pub padding_and_unknown: bool,
}
#[derive(Debug, Clone)]
pub struct AlignedPairs<'a> {
ops: &'a [CigarOp],
qpos: u32,
rpos: Pos0,
options: AlignedPairsOptions,
expanding: ExpandingState,
}
#[derive(Debug, Clone, Copy)]
enum ExpandingState {
None,
PerBase {
remaining: u32,
kind: MatchKind,
},
}
impl<'a> AlignedPairs<'a> {
pub fn new(rec_pos: Pos0, cigar: &'a [CigarOp]) -> Self {
Self {
ops: cigar,
qpos: 0,
rpos: rec_pos,
options: AlignedPairsOptions::default(),
expanding: ExpandingState::None,
}
}
pub fn with_soft_clips(mut self) -> Self {
self.options.soft_clips = true;
self
}
pub fn full(mut self) -> Self {
self.options.soft_clips = true;
self.options.padding_and_unknown = true;
self
}
pub fn matches_only(self) -> MatchesOnly<'a> {
MatchesOnly { inner: self }
}
pub fn with_read<'read>(
self,
seq: &'read [seqair_types::Base],
qual: &'read [seqair_types::BaseQuality],
) -> Result<super::aligned_pairs_view::AlignedPairsWithRead<'a, 'read>, AlignedPairsError> {
let cigar_remaining_qlen: u64 =
self.ops.iter().filter(|op| op.consumes_query()).map(|op| u64::from(op.len())).sum();
let qpos_consumed = u64::from(self.qpos);
let total_qlen = cigar_remaining_qlen.saturating_add(qpos_consumed);
if total_qlen != seq.len() as u64 {
return Err(AlignedPairsError::CigarSeqLengthMismatch {
cigar_qlen: total_qlen,
seq_len: seq.len(),
});
}
if !qual.is_empty() && qual.len() != seq.len() {
return Err(AlignedPairsError::SeqQualLengthMismatch {
seq_len: seq.len(),
qual_len: qual.len(),
});
}
Ok(super::aligned_pairs_view::AlignedPairsWithRead::new(self, seq, qual))
}
}
impl Iterator for AlignedPairs<'_> {
type Item = AlignedPair;
fn size_hint(&self) -> (usize, Option<usize>) {
let mut count: usize = 0;
if let ExpandingState::PerBase { remaining, .. } = self.expanding {
count = count.saturating_add(remaining as usize);
}
for op in self.ops {
let len = op.len();
if len == 0 {
continue;
}
match op.op_type() {
CigarOpType::Match | CigarOpType::SeqMatch | CigarOpType::SeqMismatch => {
count = count.saturating_add(len as usize);
}
CigarOpType::Insertion | CigarOpType::Deletion | CigarOpType::RefSkip => {
count = count.saturating_add(1);
}
CigarOpType::SoftClip => {
if self.options.soft_clips {
count = count.saturating_add(1);
}
}
CigarOpType::HardClip => {}
CigarOpType::Padding | CigarOpType::Unknown(_) => {
if self.options.padding_and_unknown {
count = count.saturating_add(1);
}
}
}
}
(count, Some(count))
}
fn next(&mut self) -> Option<Self::Item> {
loop {
match std::mem::replace(&mut self.expanding, ExpandingState::None) {
ExpandingState::PerBase { remaining, kind } if remaining > 0 => {
self.expanding =
ExpandingState::PerBase { remaining: remaining.saturating_sub(1), kind };
let qpos = self.qpos;
let rpos = self.rpos;
self.qpos = self.qpos.saturating_add(1);
self.rpos = advance_rpos(self.rpos, 1);
return Some(AlignedPair::Match { qpos, rpos, kind });
}
ExpandingState::PerBase { .. } => {
continue;
}
ExpandingState::None => {}
}
let (op, rest) = self.ops.split_first()?;
self.ops = rest;
let len = op.len();
let op_type = op.op_type();
if len == 0 && !matches!(op_type, CigarOpType::HardClip) {
continue;
}
match op_type {
CigarOpType::Match => {
self.expanding =
ExpandingState::PerBase { remaining: len, kind: MatchKind::Match };
continue;
}
CigarOpType::SeqMatch => {
self.expanding =
ExpandingState::PerBase { remaining: len, kind: MatchKind::SeqMatch };
continue;
}
CigarOpType::SeqMismatch => {
self.expanding =
ExpandingState::PerBase { remaining: len, kind: MatchKind::SeqMismatch };
continue;
}
CigarOpType::Insertion => {
let qpos = self.qpos;
self.qpos = self.qpos.saturating_add(len);
return Some(AlignedPair::Insertion { qpos, insert_len: len });
}
CigarOpType::Deletion => {
let rpos = self.rpos;
self.rpos = advance_rpos(self.rpos, len);
return Some(AlignedPair::Deletion { rpos, del_len: len });
}
CigarOpType::RefSkip => {
let rpos = self.rpos;
self.rpos = advance_rpos(self.rpos, len);
return Some(AlignedPair::RefSkip { rpos, skip_len: len });
}
CigarOpType::SoftClip => {
if self.options.soft_clips {
let qpos = self.qpos;
self.qpos = self.qpos.saturating_add(len);
return Some(AlignedPair::SoftClip { qpos, len });
}
self.qpos = self.qpos.saturating_add(len);
}
CigarOpType::HardClip => {
}
CigarOpType::Padding => {
if self.options.padding_and_unknown {
return Some(AlignedPair::Padding { len });
}
}
CigarOpType::Unknown(code) => {
if self.options.padding_and_unknown {
return Some(AlignedPair::Unknown { code, len });
}
}
}
}
}
}
impl ExactSizeIterator for AlignedPairs<'_> {}
impl std::iter::FusedIterator for AlignedPairs<'_> {}
#[derive(Debug, Clone)]
pub struct MatchesOnly<'a> {
inner: AlignedPairs<'a>,
}
impl Iterator for MatchesOnly<'_> {
type Item = MatchPosition;
fn size_hint(&self) -> (usize, Option<usize>) {
let (_, upper) = self.inner.size_hint();
(0, upper)
}
fn next(&mut self) -> Option<Self::Item> {
loop {
match self.inner.next()? {
AlignedPair::Match { qpos, rpos, kind } => {
return Some(MatchPosition { qpos, rpos, kind });
}
_ => continue,
}
}
}
}
impl std::iter::FusedIterator for MatchesOnly<'_> {}
#[inline]
fn advance_rpos(rpos: Pos0, n: u32) -> Pos0 {
let val = rpos.as_u64().saturating_add(u64::from(n));
let clamped = u32::try_from(val.min(i32::MAX as u64)).unwrap_or(i32::MAX as u32);
Pos0::new(clamped).unwrap_or(rpos)
}
impl SlimRecord {
pub fn aligned_pairs<'store, U>(
&self,
store: &'store RecordStore<U>,
) -> Result<AlignedPairs<'store>, RecordAccessError> {
Ok(AlignedPairs::new(self.pos, self.cigar(store)?))
}
pub fn aligned_pairs_with_read<'store, U>(
&self,
store: &'store RecordStore<U>,
) -> Result<super::aligned_pairs_view::AlignedPairsWithRead<'store, 'store>, AlignedPairsError>
{
let cigar = self.cigar(store)?;
let seq = self.seq(store)?;
let qual = self.qual(store)?;
AlignedPairs::new(self.pos, cigar).with_read(seq, qual)
}
}
impl super::owned_record::OwnedBamRecord {
pub fn aligned_pairs(&self) -> Result<AlignedPairs<'_>, AlignedPairsError> {
match self.pos {
Some(p) => Ok(AlignedPairs::new(p, &self.cigar)),
None if self.cigar.is_empty() => Ok(AlignedPairs::new(Pos0::ZERO, &self.cigar)),
None => Err(AlignedPairsError::UnmappedWithCigar { cigar_ops: self.cigar.len() }),
}
}
pub fn aligned_pairs_with_read(
&self,
) -> Result<super::aligned_pairs_view::AlignedPairsWithRead<'_, '_>, AlignedPairsError> {
let pairs = self.aligned_pairs()?;
pairs.with_read(&self.seq, &self.qual)
}
}
#[cfg(test)]
#[allow(clippy::arithmetic_side_effects, reason = "test arithmetic on known small values")]
mod tests {
use super::super::cigar::CigarOp as Op;
use super::*;
fn m(len: u32) -> Op {
Op::new(CigarOpType::Match, len)
}
fn ins(len: u32) -> Op {
Op::new(CigarOpType::Insertion, len)
}
fn del(len: u32) -> Op {
Op::new(CigarOpType::Deletion, len)
}
fn skip(len: u32) -> Op {
Op::new(CigarOpType::RefSkip, len)
}
fn soft(len: u32) -> Op {
Op::new(CigarOpType::SoftClip, len)
}
fn hard(len: u32) -> Op {
Op::new(CigarOpType::HardClip, len)
}
fn pad(len: u32) -> Op {
Op::new(CigarOpType::Padding, len)
}
fn unk(code: u8, len: u32) -> Op {
Op::new(CigarOpType::Unknown(code), len)
}
fn p0(v: u32) -> Pos0 {
Pos0::new(v).unwrap()
}
fn match_m(qpos: u32, rpos: Pos0) -> AlignedPair {
AlignedPair::Match { qpos, rpos, kind: MatchKind::Match }
}
fn match_eq(qpos: u32, rpos: Pos0) -> AlignedPair {
AlignedPair::Match { qpos, rpos, kind: MatchKind::SeqMatch }
}
fn match_x(qpos: u32, rpos: Pos0) -> AlignedPair {
AlignedPair::Match { qpos, rpos, kind: MatchKind::SeqMismatch }
}
fn seq_match(len: u32) -> Op {
Op::new(CigarOpType::SeqMatch, len)
}
fn seq_mismatch(len: u32) -> Op {
Op::new(CigarOpType::SeqMismatch, len)
}
#[test]
fn simple_match() {
let cigar = [m(5)];
let pairs: Vec<_> = AlignedPairs::new(p0(100), &cigar).collect();
assert_eq!(pairs.len(), 5);
assert_eq!(pairs[0], match_m(0, p0(100)));
assert_eq!(pairs[4], match_m(4, p0(104)));
}
#[test]
fn match_kind_is_preserved_per_op() {
let cigar = [m(2), seq_match(2), seq_mismatch(2)];
let pairs: Vec<_> = AlignedPairs::new(p0(0), &cigar).collect();
assert_eq!(pairs.len(), 6);
assert_eq!(pairs[0], match_m(0, p0(0)));
assert_eq!(pairs[1], match_m(1, p0(1)));
assert_eq!(pairs[2], match_eq(2, p0(2)));
assert_eq!(pairs[3], match_eq(3, p0(3)));
assert_eq!(pairs[4], match_x(4, p0(4)));
assert_eq!(pairs[5], match_x(5, p0(5)));
}
#[test]
fn match_kind_does_not_change_qpos_rpos() {
let mixed = [m(2), seq_match(3), seq_mismatch(2), m(1)];
let collapsed = [m(8)];
let strip_kind = |pairs: Vec<AlignedPair>| -> Vec<(u32, u32)> {
pairs
.iter()
.filter_map(|p| match p {
AlignedPair::Match { qpos, rpos, .. } => Some((*qpos, **rpos)),
_ => None,
})
.collect()
};
let mixed_positions = strip_kind(AlignedPairs::new(p0(50), &mixed).collect());
let collapsed_positions = strip_kind(AlignedPairs::new(p0(50), &collapsed).collect());
assert_eq!(mixed_positions, collapsed_positions, "kind must not affect qpos/rpos");
let kinds: Vec<MatchKind> = AlignedPairs::new(p0(50), &mixed)
.filter_map(|p| match p {
AlignedPair::Match { kind, .. } => Some(kind),
_ => None,
})
.collect();
assert!(kinds.contains(&MatchKind::Match), "mixed walk must have M");
assert!(kinds.contains(&MatchKind::SeqMatch), "mixed walk must have =");
assert!(kinds.contains(&MatchKind::SeqMismatch), "mixed walk must have X");
}
#[test]
fn with_insertion() {
let cigar = [m(2), ins(1), m(2)];
let pairs: Vec<_> = AlignedPairs::new(p0(100), &cigar).collect();
assert_eq!(pairs.len(), 5, "2M + 1I(summary) + 2M = 5 items");
assert_eq!(pairs[0], match_m(0, p0(100)));
assert_eq!(pairs[1], match_m(1, p0(101)));
assert_eq!(pairs[2], AlignedPair::Insertion { qpos: 2, insert_len: 1 });
assert_eq!(pairs[3], match_m(3, p0(102)));
assert_eq!(pairs[4], match_m(4, p0(103)));
}
#[test]
fn with_deletion() {
let cigar = [m(2), del(3), m(2)];
let pairs: Vec<_> = AlignedPairs::new(p0(100), &cigar).collect();
assert_eq!(pairs.len(), 5, "2M + 1D(summary) + 2M = 5 items");
assert_eq!(pairs[0], match_m(0, p0(100)));
assert_eq!(pairs[1], match_m(1, p0(101)));
assert_eq!(pairs[2], AlignedPair::Deletion { rpos: p0(102), del_len: 3 });
assert_eq!(pairs[3], match_m(2, p0(105)));
assert_eq!(pairs[4], match_m(3, p0(106)));
}
#[test]
fn with_refskip() {
let cigar = [m(2), skip(5), m(3)];
let pairs: Vec<_> = AlignedPairs::new(p0(100), &cigar).collect();
assert_eq!(pairs.len(), 6, "2M + 1N(summary) + 3M = 6 items");
assert_eq!(pairs[2], AlignedPair::RefSkip { rpos: p0(102), skip_len: 5 });
assert_eq!(pairs[3], match_m(2, p0(107)));
}
#[test]
fn qpos_includes_soft_clips() {
let cigar = [soft(5), m(3)];
let pairs: Vec<_> = AlignedPairs::new(p0(0), &cigar).collect();
assert_eq!(pairs.len(), 3);
assert_eq!(pairs[0], match_m(5, p0(0)));
assert_eq!(pairs[2], match_m(7, p0(2)));
}
#[test]
fn hard_clips_ignored() {
let cigar = [hard(3), m(2), hard(1)];
let pairs: Vec<_> = AlignedPairs::new(p0(0), &cigar).collect();
assert_eq!(pairs.len(), 2);
assert_eq!(pairs[0], match_m(0, p0(0)));
assert_eq!(pairs[1], match_m(1, p0(1)));
}
#[test]
fn soft_clips_hidden_by_default() {
let cigar = [soft(3), m(2)];
let pairs: Vec<_> = AlignedPairs::new(p0(0), &cigar).collect();
assert_eq!(pairs.len(), 2);
assert!(pairs.iter().all(|p| matches!(p, AlignedPair::Match { .. })));
}
#[test]
fn with_soft_clips_enables_them() {
let cigar = [soft(3), m(2)];
let pairs: Vec<_> = AlignedPairs::new(p0(0), &cigar).with_soft_clips().collect();
assert_eq!(pairs.len(), 3, "SoftClip(summary) + 2M");
assert_eq!(pairs[0], AlignedPair::SoftClip { qpos: 0, len: 3 });
assert_eq!(pairs[1], match_m(3, p0(0)));
}
#[test]
fn full_yields_everything() {
let cigar = [soft(1), m(1), pad(2), ins(1), unk(9, 2)];
let pairs: Vec<_> = AlignedPairs::new(p0(0), &cigar).full().collect();
assert_eq!(pairs.len(), 5);
assert!(matches!(pairs[0], AlignedPair::SoftClip { .. }));
assert!(matches!(pairs[1], AlignedPair::Match { .. }));
assert_eq!(pairs[2], AlignedPair::Padding { len: 2 });
assert!(matches!(pairs[3], AlignedPair::Insertion { .. }));
assert!(matches!(pairs[4], AlignedPair::Unknown { code: 9, len: 2 }));
}
#[test]
fn full_yields_padding_and_unknown() {
let cigar = [m(1), pad(3), unk(13, 4), m(1)];
let pairs: Vec<_> = AlignedPairs::new(p0(0), &cigar).full().collect();
assert_eq!(pairs.len(), 4);
assert!(matches!(pairs[0], AlignedPair::Match { .. }));
assert_eq!(pairs[1], AlignedPair::Padding { len: 3 });
assert!(matches!(pairs[2], AlignedPair::Unknown { code: 13, len: 4 }));
assert!(matches!(pairs[3], AlignedPair::Match { .. }));
}
#[test]
fn default_skips_padding_and_unknown() {
let cigar = [m(1), pad(1), unk(13, 4), m(1)];
let pairs: Vec<_> = AlignedPairs::new(p0(0), &cigar).collect();
assert_eq!(pairs.len(), 2);
assert!(matches!(pairs[0], AlignedPair::Match { .. }));
assert!(matches!(pairs[1], AlignedPair::Match { .. }));
}
#[test]
fn empty_cigar() {
let pairs: Vec<_> = AlignedPairs::new(p0(0), &[]).collect();
assert!(pairs.is_empty());
}
#[test]
fn zero_length_match_skipped() {
let cigar = [m(0), m(2)];
let pairs: Vec<_> = AlignedPairs::new(p0(0), &cigar).collect();
assert_eq!(pairs.len(), 2);
assert_eq!(pairs[0], match_m(0, p0(0)));
}
#[test]
fn zero_length_indel_skipped() {
let cigar = [m(1), ins(0), del(0), skip(0), Op::new(CigarOpType::SoftClip, 0), m(1)];
let pairs: Vec<_> = AlignedPairs::new(p0(0), &cigar).with_soft_clips().collect();
assert_eq!(pairs.len(), 2, "only the two Match positions remain");
assert_eq!(pairs[0], match_m(0, p0(0)));
assert_eq!(pairs[1], match_m(1, p0(1)));
}
#[test]
fn zero_length_padding_and_unknown_skipped() {
let cigar = [m(1), pad(0), unk(9, 0), m(1)];
let pairs: Vec<_> = AlignedPairs::new(p0(0), &cigar).full().collect();
assert_eq!(pairs.len(), 2);
assert!(matches!(pairs[0], AlignedPair::Match { .. }));
assert!(matches!(pairs[1], AlignedPair::Match { .. }));
}
#[test]
fn matches_only_drops_indels_and_keeps_match_kind() {
let cigar = [m(2), ins(3), seq_match(2), del(1), seq_mismatch(2)];
let matches: Vec<_> = AlignedPairs::new(p0(100), &cigar).matches_only().collect();
assert_eq!(matches.len(), 6);
assert_eq!(matches[0].kind, MatchKind::Match);
assert_eq!(matches[1].kind, MatchKind::Match);
assert_eq!(matches[2].kind, MatchKind::SeqMatch);
assert_eq!(matches[3].kind, MatchKind::SeqMatch);
assert_eq!(matches[4].kind, MatchKind::SeqMismatch);
assert_eq!(matches[5].kind, MatchKind::SeqMismatch);
for window in matches.windows(2) {
assert!(window[0].qpos <= window[1].qpos);
assert!(*window[0].rpos <= *window[1].rpos);
}
}
#[test]
fn matches_only_count_equals_filtered_match_events() {
let cigar = [soft(2), m(5), ins(3), m(4), del(2), m(3)];
let bare_match_count = AlignedPairs::new(p0(0), &cigar)
.filter(|p| matches!(p, AlignedPair::Match { .. }))
.count();
let matches_only_count = AlignedPairs::new(p0(0), &cigar).matches_only().count();
assert_eq!(bare_match_count, matches_only_count);
assert_eq!(matches_only_count, 12);
}
#[test]
fn size_hint_exact_for_match_only_cigar() {
let cigar = [m(5)];
let it = AlignedPairs::new(p0(0), &cigar);
assert_eq!(it.size_hint(), (5, Some(5)));
assert_eq!(it.len(), 5);
}
#[test]
fn size_hint_exact_with_indels_and_clips() {
let cigar = [soft(2), m(5), ins(3), m(4), del(2), m(3)];
let it = AlignedPairs::new(p0(0), &cigar);
assert_eq!(it.size_hint(), (14, Some(14)));
}
#[test]
fn size_hint_includes_soft_clips_when_enabled() {
let cigar = [soft(2), m(5)];
assert_eq!(AlignedPairs::new(p0(0), &cigar).len(), 5);
assert_eq!(AlignedPairs::new(p0(0), &cigar).with_soft_clips().len(), 6);
}
#[test]
fn size_hint_skips_zero_length_ops() {
let cigar = [m(0), m(3), ins(0), m(2)];
assert_eq!(AlignedPairs::new(p0(0), &cigar).len(), 5);
}
#[test]
fn size_hint_decrements_with_iteration() {
let cigar = [m(5), del(1), m(3)];
let mut it = AlignedPairs::new(p0(0), &cigar);
assert_eq!(it.len(), 9); let _ = it.next();
assert_eq!(it.len(), 8);
let _ = it.next();
assert_eq!(it.len(), 7);
while it.next().is_some() {}
assert_eq!(it.len(), 0);
}
#[test]
fn collect_pre_allocates_with_size_hint() {
let cigar = [m(100)];
let collected: Vec<_> = AlignedPairs::new(p0(0), &cigar).collect();
assert_eq!(collected.len(), 100);
assert!(collected.capacity() >= 100);
}
#[test]
fn clone_lets_caller_walk_twice() {
let cigar = [m(3), ins(1), m(2)];
let it = AlignedPairs::new(p0(50), &cigar);
let it_clone = it.clone();
let first: Vec<_> = it.collect();
let second: Vec<_> = it_clone.collect();
assert_eq!(first, second);
}
#[test]
fn deletion_followed_by_insertion_yields_two_events() {
let cigar = [del(2), ins(3)];
let pairs: Vec<_> = AlignedPairs::new(p0(100), &cigar).collect();
assert_eq!(pairs.len(), 2);
assert_eq!(pairs[0], AlignedPair::Deletion { rpos: p0(100), del_len: 2 });
assert_eq!(pairs[1], AlignedPair::Insertion { qpos: 0, insert_len: 3 });
}
#[test]
fn complex_cigar_walk() {
let cigar = [soft(2), m(3), ins(1), m(2), del(1), skip(1), m(2)];
let pairs: Vec<_> = AlignedPairs::new(p0(50), &cigar).collect();
let expected = vec![
match_m(2, p0(50)), match_m(3, p0(51)),
match_m(4, p0(52)),
AlignedPair::Insertion { qpos: 5, insert_len: 1 },
match_m(6, p0(53)),
match_m(7, p0(54)),
AlignedPair::Deletion { rpos: p0(55), del_len: 1 },
AlignedPair::RefSkip { rpos: p0(56), skip_len: 1 },
match_m(8, p0(57)),
match_m(9, p0(58)),
];
assert_eq!(pairs, expected);
}
#[test]
fn qpos_is_monotone_nondecreasing() {
let cigar = [soft(2), m(3), ins(1), m(2), del(2), m(1)];
let pairs: Vec<_> = AlignedPairs::new(p0(0), &cigar).with_soft_clips().collect();
let qposes: Vec<u32> = pairs
.iter()
.filter_map(|p| match p {
AlignedPair::Match { qpos, .. } => Some(*qpos),
AlignedPair::Insertion { qpos, .. } => Some(*qpos),
AlignedPair::SoftClip { qpos, .. } => Some(*qpos),
_ => None,
})
.collect();
assert!(!qposes.is_empty());
assert!(qposes.windows(2).all(|w| w[0] <= w[1]));
}
#[test]
fn rpos_is_monotone_nondecreasing() {
let cigar = [m(3), del(2), skip(1), m(2)];
let pairs: Vec<_> = AlignedPairs::new(p0(100), &cigar).collect();
let rposes: Vec<u32> = pairs
.iter()
.filter_map(|p| match p {
AlignedPair::Match { rpos, .. } => Some(**rpos),
AlignedPair::Deletion { rpos, .. } => Some(**rpos),
AlignedPair::RefSkip { rpos, .. } => Some(**rpos),
_ => None,
})
.collect();
assert!(!rposes.is_empty());
assert!(rposes.windows(2).all(|w| w[0] <= w[1]));
}
fn oracle_walk(
cigar: &[CigarOp],
rec_pos: Pos0,
options: AlignedPairsOptions,
) -> Vec<AlignedPair> {
let mut pairs = Vec::new();
let mut qpos = 0u32;
let mut rpos = rec_pos;
for op in cigar {
let len = op.len();
if len == 0 && !matches!(op.op_type(), CigarOpType::HardClip) {
continue;
}
let match_kind = match op.op_type() {
CigarOpType::Match => Some(MatchKind::Match),
CigarOpType::SeqMatch => Some(MatchKind::SeqMatch),
CigarOpType::SeqMismatch => Some(MatchKind::SeqMismatch),
_ => None,
};
match op.op_type() {
CigarOpType::Match | CigarOpType::SeqMatch | CigarOpType::SeqMismatch => {
let kind = match_kind.expect("set above for M/=/X");
for i in 0..len {
pairs.push(AlignedPair::Match {
qpos: qpos.saturating_add(i),
rpos: advance_rpos(rpos, i),
kind,
});
}
qpos = qpos.saturating_add(len);
rpos = advance_rpos(rpos, len);
}
CigarOpType::Insertion => {
pairs.push(AlignedPair::Insertion { qpos, insert_len: len });
qpos = qpos.saturating_add(len);
}
CigarOpType::Deletion => {
pairs.push(AlignedPair::Deletion { rpos, del_len: len });
rpos = advance_rpos(rpos, len);
}
CigarOpType::RefSkip => {
pairs.push(AlignedPair::RefSkip { rpos, skip_len: len });
rpos = advance_rpos(rpos, len);
}
CigarOpType::SoftClip => {
if options.soft_clips {
pairs.push(AlignedPair::SoftClip { qpos, len });
}
qpos = qpos.saturating_add(len);
}
CigarOpType::HardClip => {
}
CigarOpType::Padding => {
if options.padding_and_unknown {
pairs.push(AlignedPair::Padding { len });
}
}
CigarOpType::Unknown(code) => {
if options.padding_and_unknown {
pairs.push(AlignedPair::Unknown { code, len });
}
}
}
}
pairs
}
#[test]
fn oracle_matches_iterator_default() {
let cigars: &[&[CigarOp]] = &[
&[m(5)],
&[m(2), ins(1), m(2)],
&[m(2), del(3), m(2)],
&[soft(2), m(3)],
&[m(2), skip(5), m(3)],
&[del(2), ins(3)],
&[hard(3), m(2)],
&[m(1), pad(2), m(1)],
&[],
&[soft(1), m(1), ins(1), del(1), skip(1), m(1)],
&[m(1), seq_match(2), seq_mismatch(1), m(1)],
];
for cigar in cigars {
let actual: Vec<_> = AlignedPairs::new(p0(100), cigar).collect();
let expected = oracle_walk(cigar, p0(100), AlignedPairsOptions::default());
assert_eq!(actual, expected, "mismatch for CIGAR: {cigar:?}");
}
}
#[test]
fn oracle_matches_iterator_full() {
let cigar = &[soft(1), m(1), pad(2), ins(1), del(1), unk(9, 2), m(1)];
let actual: Vec<_> = AlignedPairs::new(p0(0), cigar).full().collect();
let opts = AlignedPairsOptions { soft_clips: true, padding_and_unknown: true };
let expected = oracle_walk(cigar, p0(0), opts);
assert_eq!(actual, expected);
}
mod proptests {
use super::*;
use proptest::prelude::*;
fn arb_cigar_op() -> impl Strategy<Value = CigarOp> {
(0u8..=14u8, 0u32..=50u32).prop_map(|(code, len)| {
let op_type = CigarOpType::from_bam(code);
CigarOp::new(op_type, len)
})
}
fn arb_cigar() -> impl Strategy<Value = Vec<CigarOp>> {
proptest::collection::vec(arb_cigar_op(), 0..20)
}
fn arb_pos0() -> impl Strategy<Value = Pos0> {
(0u32..=100_000u32).prop_map(|v| Pos0::new(v).unwrap())
}
proptest! {
#[test]
fn matches_oracle_default(
cigar in arb_cigar(),
pos in arb_pos0(),
) {
let actual: Vec<_> = AlignedPairs::new(pos, &cigar).collect();
let expected = oracle_walk(&cigar, pos, AlignedPairsOptions::default());
prop_assert_eq!(actual, expected);
}
}
proptest! {
#[test]
fn matches_oracle_full(
cigar in arb_cigar(),
pos in arb_pos0(),
) {
let actual: Vec<_> = AlignedPairs::new(pos, &cigar).full().collect();
let opts = AlignedPairsOptions {
soft_clips: true,
padding_and_unknown: true,
};
let expected = oracle_walk(&cigar, pos, opts);
prop_assert_eq!(actual, expected);
}
}
proptest! {
#[test]
fn ref_pos_monotone_nondecreasing(
cigar in arb_cigar(),
) {
let pairs: Vec<_> = AlignedPairs::new(Pos0::ZERO, &cigar).collect();
let rposes: Vec<u32> = pairs.iter().filter_map(|p| match p {
AlignedPair::Match { rpos, .. } => Some(**rpos),
AlignedPair::Deletion { rpos, .. } => Some(**rpos),
AlignedPair::RefSkip { rpos, .. } => Some(**rpos),
_ => None,
}).collect();
prop_assert!(rposes.windows(2).all(|w| w[0] <= w[1]));
}
}
proptest! {
#[test]
fn qpos_monotone_nondecreasing(
cigar in arb_cigar(),
) {
let pairs: Vec<_> = AlignedPairs::new(Pos0::ZERO, &cigar).full().collect();
let qposes: Vec<u32> = pairs.iter().filter_map(|p| match p {
AlignedPair::Match { qpos, .. } => Some(*qpos),
AlignedPair::Insertion { qpos, .. } => Some(*qpos),
AlignedPair::SoftClip { qpos, .. } => Some(*qpos),
_ => None,
}).collect();
prop_assert!(qposes.windows(2).all(|w| w[0] <= w[1]));
}
}
}
#[cfg(test)]
mod htslib_tests {
use super::*;
fn expand_iterator(cigar: &[CigarOp], rec_pos: Pos0) -> Vec<(Option<u32>, Option<u32>)> {
let mut result = Vec::new();
for pair in AlignedPairs::new(rec_pos, cigar).with_soft_clips() {
match pair {
AlignedPair::Match { qpos, rpos, .. } => {
result.push((Some(qpos), Some(*rpos)));
}
AlignedPair::Insertion { qpos, insert_len } => {
for i in 0..insert_len {
result.push((Some(qpos.saturating_add(i)), None));
}
}
AlignedPair::Deletion { rpos, del_len } => {
for i in 0..del_len {
result.push((None, Some(*advance_rpos(rpos, i))));
}
}
AlignedPair::RefSkip { rpos, skip_len } => {
for i in 0..skip_len {
result.push((None, Some(*advance_rpos(rpos, i))));
}
}
AlignedPair::SoftClip { qpos, len } => {
for i in 0..len {
result.push((Some(qpos.saturating_add(i)), None));
}
}
AlignedPair::Padding { .. } | AlignedPair::Unknown { .. } => {
}
}
}
result
}
fn htslib_pairs(cigar_str: &str, pos: i64) -> Vec<(Option<u32>, Option<u32>)> {
use rust_htslib::bam::ext::BamRecordExtensions;
use rust_htslib::bam::record::Record;
let cigar_string = build_htslib_cigar(cigar_str);
let mut rec = Record::new();
rec.set_pos(pos);
rec.set_cigar(Some(&cigar_string));
#[expect(
clippy::cast_sign_loss,
clippy::cast_possible_truncation,
reason = "test fixtures: positions are non-negative and ≤ i32::MAX"
)]
let pairs = rec
.aligned_pairs_full()
.map(|arr| (arr[0].map(|v| v as u32), arr[1].map(|v| v as u32)))
.collect();
pairs
}
#[test]
fn matches_htslib_aligned_pairs_full() {
let test_cases: &[(&str, i64)] = &[
("5M", 100),
("3=", 100),
("3X", 100),
("2M1I2M", 100),
("2M3D2M", 100),
("3M2N4M", 100),
("5S3M", 0),
("3S5M2S", 0),
("3M5S", 0),
("3H5M", 100),
("5M2H", 100),
("3H5M2H", 100),
("3H2S5M", 100),
("2H3S5M2S1H", 100),
("2M1D1I2M", 100),
("2M1I2D3M", 100),
("3M1I1D2M", 100),
("2M1D2N1M", 100),
("3=2X1=", 50),
("2S3=1X2=2S", 0),
("1=1X1=1X1M", 50),
("100M", 1_000_000),
("50M50D50M", 0),
];
for &(cigar_str, pos) in test_cases {
#[expect(
clippy::cast_sign_loss,
clippy::cast_possible_truncation,
reason = "test fixtures: positions are non-negative and ≤ i32::MAX"
)]
let pos_u32 = pos as u32;
let our_pairs =
expand_iterator(&parse_cigar_string(cigar_str), Pos0::new(pos_u32).unwrap());
let hts_pairs = htslib_pairs(cigar_str, pos);
assert_eq!(
our_pairs, hts_pairs,
"CIGAR '{cigar_str}' at pos {pos}: seqair AlignedPairs (expanded) \
differs from rust-htslib::aligned_pairs_full"
);
}
}
#[test]
fn match_kind_does_not_change_htslib_shape() {
let cigars = ["5M", "5=", "5X"];
let pos = 100;
let mut expansions = Vec::new();
for s in cigars {
expansions.push(expand_iterator(&parse_cigar_string(s), Pos0::new(pos).unwrap()));
}
assert_eq!(expansions[0], expansions[1], "5M and 5= must expand identically");
assert_eq!(expansions[1], expansions[2], "5= and 5X must expand identically");
}
fn build_htslib_cigar(s: &str) -> rust_htslib::bam::record::CigarString {
use rust_htslib::bam::record::{Cigar, CigarString};
let mut cigars = Vec::new();
let mut num = 0u32;
for b in s.bytes() {
match b {
b'0'..=b'9' => {
num = num.saturating_mul(10).saturating_add(u32::from(b - b'0'));
}
b'M' => {
cigars.push(Cigar::Match(num));
num = 0;
}
b'I' => {
cigars.push(Cigar::Ins(num));
num = 0;
}
b'D' => {
cigars.push(Cigar::Del(num));
num = 0;
}
b'N' => {
cigars.push(Cigar::RefSkip(num));
num = 0;
}
b'S' => {
cigars.push(Cigar::SoftClip(num));
num = 0;
}
b'H' => {
cigars.push(Cigar::HardClip(num));
num = 0;
}
b'P' => {
cigars.push(Cigar::Pad(num));
num = 0;
}
b'=' => {
cigars.push(Cigar::Equal(num));
num = 0;
}
b'X' => {
cigars.push(Cigar::Diff(num));
num = 0;
}
_ => {}
}
}
CigarString(cigars)
}
mod htslib_proptests {
use super::*;
use proptest::prelude::*;
fn arb_op_char_len() -> impl Strategy<Value = (char, u32)> {
let chars = prop_oneof![
Just('M'),
Just('I'),
Just('D'),
Just('N'),
Just('S'),
Just('H'),
Just('='),
Just('X'),
];
(chars, 1u32..=20u32)
}
fn arb_cigar_string() -> impl Strategy<Value = String> {
proptest::collection::vec(arb_op_char_len(), 1..=10).prop_map(|ops| {
let mut s = String::new();
for (c, n) in ops {
s.push_str(&n.to_string());
s.push(c);
}
s
})
}
proptest! {
#[test]
fn matches_htslib_random_cigars(
cigar_str in arb_cigar_string(),
pos in 0i64..=100_000i64,
) {
#[expect(
clippy::cast_sign_loss,
clippy::cast_possible_truncation,
reason = "proptest range 0..=100_000 fits in u32"
)]
let pos_u32 = pos as u32;
let our_pairs = expand_iterator(
&parse_cigar_string(&cigar_str),
Pos0::new(pos_u32).unwrap(),
);
let hts_pairs = htslib_pairs(&cigar_str, pos);
prop_assert_eq!(
our_pairs,
hts_pairs,
"CIGAR '{}' at pos {} diverges from rust-htslib",
cigar_str,
pos,
);
}
}
}
fn parse_cigar_string(s: &str) -> Vec<CigarOp> {
let mut ops = Vec::new();
let mut num = 0u32;
for b in s.bytes() {
match b {
b'0'..=b'9' => {
num = num.saturating_mul(10).saturating_add(u32::from(b - b'0'));
}
b'M' => {
ops.push(Op::new(CigarOpType::Match, num));
num = 0;
}
b'I' => {
ops.push(Op::new(CigarOpType::Insertion, num));
num = 0;
}
b'D' => {
ops.push(Op::new(CigarOpType::Deletion, num));
num = 0;
}
b'N' => {
ops.push(Op::new(CigarOpType::RefSkip, num));
num = 0;
}
b'S' => {
ops.push(Op::new(CigarOpType::SoftClip, num));
num = 0;
}
b'H' => {
ops.push(Op::new(CigarOpType::HardClip, num));
num = 0;
}
b'P' => {
ops.push(Op::new(CigarOpType::Padding, num));
num = 0;
}
b'=' => {
ops.push(Op::new(CigarOpType::SeqMatch, num));
num = 0;
}
b'X' => {
ops.push(Op::new(CigarOpType::SeqMismatch, num));
num = 0;
}
_ => {}
}
}
ops
}
}
mod slim_record_tests {
use super::super::super::owned_record::OwnedBamRecord;
use super::super::super::record_store::RecordStore;
use super::*;
use seqair_types::{BamFlags, Base, BaseQuality};
fn store_with_record(pos: u32, cigar: &[CigarOp], seq_len: usize) -> RecordStore<()> {
let rec = OwnedBamRecord::builder(0, Some(Pos0::new(pos).unwrap()), b"r".to_vec())
.flags(BamFlags::empty())
.cigar(cigar.to_vec())
.seq(vec![Base::A; seq_len])
.qual(vec![BaseQuality::from_byte(30); seq_len])
.build()
.unwrap();
let mut buf = Vec::new();
rec.to_bam_bytes(&mut buf).unwrap();
let mut store = RecordStore::<()>::new();
let _ = store.push_raw(&buf, &mut ()).unwrap();
store
}
#[test]
fn slim_record_round_trips_simple_match() {
let cigar = [m(5)];
let store = store_with_record(100, &cigar, 5);
let rec = store.record(0);
let pairs: Vec<_> = rec.aligned_pairs(&store).unwrap().collect();
assert_eq!(pairs.len(), 5);
assert_eq!(pairs[0], match_m(0, p0(100)));
assert_eq!(pairs[4], match_m(4, p0(104)));
}
#[test]
fn slim_record_matches_owned_record_walk() {
let cigar = vec![m(2), ins(1), m(2), del(1), m(2)];
let owned = OwnedBamRecord::builder(0, Some(Pos0::new(200).unwrap()), b"r".to_vec())
.flags(BamFlags::empty())
.cigar(cigar.clone())
.seq(vec![Base::A; 7])
.qual(vec![BaseQuality::from_byte(30); 7])
.build()
.unwrap();
let mut buf = Vec::new();
owned.to_bam_bytes(&mut buf).unwrap();
let mut store = RecordStore::<()>::new();
let _ = store.push_raw(&buf, &mut ()).unwrap();
let slim = store.record(0);
let slim_pairs: Vec<_> = slim.aligned_pairs(&store).unwrap().collect();
let owned_pairs: Vec<_> = owned.aligned_pairs().unwrap().collect();
assert_eq!(slim_pairs, owned_pairs);
}
}
}