use super::aligned_pairs::{AlignedPair, AlignedPairs, MatchKind};
use super::pileup::RefSeq;
use seqair_types::{Base, BaseQuality, Pos0};
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub struct MatchedBase {
pub qpos: u32,
pub rpos: Pos0,
pub kind: MatchKind,
pub query: Base,
pub qual: BaseQuality,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub struct MatchedRef {
pub qpos: u32,
pub rpos: Pos0,
pub kind: MatchKind,
pub query: Base,
pub qual: BaseQuality,
pub ref_base: Option<Base>,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum AlignedPairWithRead<'read> {
Match {
qpos: u32,
rpos: Pos0,
kind: MatchKind,
query: Base,
qual: BaseQuality,
},
Insertion { qpos: u32, query: &'read [Base], qual: &'read [BaseQuality] },
Deletion { rpos: Pos0, del_len: u32 },
RefSkip { rpos: Pos0, skip_len: u32 },
SoftClip { qpos: u32, query: &'read [Base], qual: &'read [BaseQuality] },
Padding { len: u32 },
Unknown { code: u8, len: u32 },
}
#[derive(Debug, Clone)]
pub struct AlignedPairsWithRead<'cigar, 'read> {
inner: AlignedPairs<'cigar>,
seq: &'read [Base],
qual: &'read [BaseQuality],
}
impl<'cigar, 'read> AlignedPairsWithRead<'cigar, 'read> {
pub(super) fn new(
inner: AlignedPairs<'cigar>,
seq: &'read [Base],
qual: &'read [BaseQuality],
) -> Self {
Self { inner, seq, qual }
}
pub fn with_soft_clips(mut self) -> Self {
self.inner = self.inner.with_soft_clips();
self
}
pub fn full(mut self) -> Self {
self.inner = self.inner.full();
self
}
pub fn with_reference<'ref_seq>(
self,
ref_seq: &'ref_seq RefSeq,
) -> AlignedPairsWithRef<'cigar, 'read, 'ref_seq> {
AlignedPairsWithRef { inner: self, ref_seq }
}
pub fn matches_only(self) -> MatchedBases<'cigar, 'read> {
MatchedBases { inner: self }
}
fn attach_read(&self, pair: AlignedPair) -> AlignedPairWithRead<'read> {
match pair {
AlignedPair::Match { qpos, rpos, kind } => {
let q = qpos as usize;
AlignedPairWithRead::Match {
qpos,
rpos,
kind,
query: self.seq.get(q).copied().unwrap_or(Base::Unknown),
qual: self.qual.get(q).copied().unwrap_or(BaseQuality::UNAVAILABLE),
}
}
AlignedPair::Insertion { qpos, insert_len } => {
let (q, q_end) = range_for(qpos, insert_len);
AlignedPairWithRead::Insertion {
qpos,
query: self.seq.get(q..q_end).unwrap_or(&[]),
qual: self.qual.get(q..q_end).unwrap_or(&[]),
}
}
AlignedPair::Deletion { rpos, del_len } => {
AlignedPairWithRead::Deletion { rpos, del_len }
}
AlignedPair::RefSkip { rpos, skip_len } => {
AlignedPairWithRead::RefSkip { rpos, skip_len }
}
AlignedPair::SoftClip { qpos, len } => {
let (q, q_end) = range_for(qpos, len);
AlignedPairWithRead::SoftClip {
qpos,
query: self.seq.get(q..q_end).unwrap_or(&[]),
qual: self.qual.get(q..q_end).unwrap_or(&[]),
}
}
AlignedPair::Padding { len } => AlignedPairWithRead::Padding { len },
AlignedPair::Unknown { code, len } => AlignedPairWithRead::Unknown { code, len },
}
}
}
impl<'read> Iterator for AlignedPairsWithRead<'_, 'read> {
type Item = AlignedPairWithRead<'read>;
fn size_hint(&self) -> (usize, Option<usize>) {
self.inner.size_hint()
}
fn next(&mut self) -> Option<Self::Item> {
let pair = self.inner.next()?;
Some(self.attach_read(pair))
}
}
impl ExactSizeIterator for AlignedPairsWithRead<'_, '_> {}
impl std::iter::FusedIterator for AlignedPairsWithRead<'_, '_> {}
#[inline]
fn range_for(qpos: u32, len: u32) -> (usize, usize) {
let start = qpos as usize;
let end = start.saturating_add(len as usize);
(start, end)
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum AlignedPairWithRef<'read, 'ref_seq> {
Match {
qpos: u32,
rpos: Pos0,
kind: MatchKind,
query: Base,
qual: BaseQuality,
ref_base: Option<Base>,
},
Insertion { qpos: u32, query: &'read [Base], qual: &'read [BaseQuality] },
Deletion { rpos: Pos0, del_len: u32, ref_bases: Option<&'ref_seq [Base]> },
RefSkip { rpos: Pos0, skip_len: u32 },
SoftClip { qpos: u32, query: &'read [Base], qual: &'read [BaseQuality] },
Padding { len: u32 },
Unknown { code: u8, len: u32 },
}
#[derive(Debug, Clone)]
pub struct AlignedPairsWithRef<'cigar, 'read, 'ref_seq> {
inner: AlignedPairsWithRead<'cigar, 'read>,
ref_seq: &'ref_seq RefSeq,
}
impl<'cigar, 'read, 'ref_seq> AlignedPairsWithRef<'cigar, 'read, 'ref_seq> {
pub fn with_soft_clips(mut self) -> Self {
self.inner = self.inner.with_soft_clips();
self
}
pub fn full(mut self) -> Self {
self.inner = self.inner.full();
self
}
pub fn matches_only(self) -> MatchedRefs<'cigar, 'read, 'ref_seq> {
MatchedRefs { inner: self }
}
}
impl<'read, 'ref_seq> Iterator for AlignedPairsWithRef<'_, 'read, 'ref_seq> {
type Item = AlignedPairWithRef<'read, 'ref_seq>;
fn size_hint(&self) -> (usize, Option<usize>) {
self.inner.size_hint()
}
fn next(&mut self) -> Option<Self::Item> {
let pair = self.inner.next()?;
Some(match pair {
AlignedPairWithRead::Match { qpos, rpos, kind, query, qual } => {
AlignedPairWithRef::Match {
qpos,
rpos,
kind,
query,
qual,
ref_base: self.ref_seq.try_base_at(rpos),
}
}
AlignedPairWithRead::Insertion { qpos, query, qual } => {
AlignedPairWithRef::Insertion { qpos, query, qual }
}
AlignedPairWithRead::Deletion { rpos, del_len } => AlignedPairWithRef::Deletion {
rpos,
del_len,
ref_bases: self.ref_seq.range(rpos, del_len),
},
AlignedPairWithRead::RefSkip { rpos, skip_len } => {
AlignedPairWithRef::RefSkip { rpos, skip_len }
}
AlignedPairWithRead::SoftClip { qpos, query, qual } => {
AlignedPairWithRef::SoftClip { qpos, query, qual }
}
AlignedPairWithRead::Padding { len } => AlignedPairWithRef::Padding { len },
AlignedPairWithRead::Unknown { code, len } => AlignedPairWithRef::Unknown { code, len },
})
}
}
impl ExactSizeIterator for AlignedPairsWithRef<'_, '_, '_> {}
impl std::iter::FusedIterator for AlignedPairsWithRef<'_, '_, '_> {}
#[derive(Debug, Clone)]
pub struct MatchedBases<'cigar, 'read> {
inner: AlignedPairsWithRead<'cigar, 'read>,
}
impl Iterator for MatchedBases<'_, '_> {
type Item = MatchedBase;
fn size_hint(&self) -> (usize, Option<usize>) {
let (_, upper) = self.inner.size_hint();
(0, upper)
}
fn next(&mut self) -> Option<Self::Item> {
loop {
match self.inner.next()? {
AlignedPairWithRead::Match { qpos, rpos, kind, query, qual } => {
return Some(MatchedBase { qpos, rpos, kind, query, qual });
}
_ => continue,
}
}
}
}
impl std::iter::FusedIterator for MatchedBases<'_, '_> {}
#[derive(Debug, Clone)]
pub struct MatchedRefs<'cigar, 'read, 'ref_seq> {
inner: AlignedPairsWithRef<'cigar, 'read, 'ref_seq>,
}
impl Iterator for MatchedRefs<'_, '_, '_> {
type Item = MatchedRef;
fn size_hint(&self) -> (usize, Option<usize>) {
let (_, upper) = self.inner.size_hint();
(0, upper)
}
fn next(&mut self) -> Option<Self::Item> {
loop {
match self.inner.next()? {
AlignedPairWithRef::Match { qpos, rpos, kind, query, qual, ref_base } => {
return Some(MatchedRef { qpos, rpos, kind, query, qual, ref_base });
}
_ => continue,
}
}
}
}
impl std::iter::FusedIterator for MatchedRefs<'_, '_, '_> {}
#[cfg(test)]
#[allow(clippy::arithmetic_side_effects, reason = "test arithmetic on small values")]
mod tests {
use super::super::cigar::{CigarOp, CigarOpType};
use super::super::owned_record::OwnedBamRecord;
use super::super::record_store::RecordStore;
use super::*;
use seqair_types::{BamFlags, Base, BaseQuality, Pos0};
use std::rc::Rc;
fn op(t: CigarOpType, len: u32) -> CigarOp {
CigarOp::new(t, len)
}
fn p0(v: u32) -> Pos0 {
Pos0::new(v).unwrap()
}
fn make_store(
pos: Pos0,
cigar: Vec<CigarOp>,
seq: Vec<Base>,
qual: Vec<BaseQuality>,
) -> RecordStore<()> {
let owned = OwnedBamRecord::builder(0, Some(pos), b"r".to_vec())
.flags(BamFlags::empty())
.cigar(cigar)
.seq(seq)
.qual(qual)
.build()
.unwrap();
let mut buf = Vec::new();
owned.to_bam_bytes(&mut buf).unwrap();
let mut store = RecordStore::<()>::new();
let _ = store.push_raw(&buf, &mut ()).unwrap();
store
}
#[test]
fn with_read_yields_query_base_and_qual_for_match() {
let cigar = vec![op(CigarOpType::Match, 3)];
let seq = vec![Base::A, Base::C, Base::G];
let qual = vec![
BaseQuality::from_byte(30),
BaseQuality::from_byte(31),
BaseQuality::from_byte(32),
];
let store = make_store(p0(100), cigar, seq, qual);
let rec = store.record(0);
let events: Vec<_> = rec.aligned_pairs_with_read(&store).unwrap().collect();
assert_eq!(events.len(), 3);
assert_eq!(
events[0],
AlignedPairWithRead::Match {
qpos: 0,
rpos: p0(100),
kind: MatchKind::Match,
query: Base::A,
qual: BaseQuality::from_byte(30),
}
);
assert_eq!(
events[2],
AlignedPairWithRead::Match {
qpos: 2,
rpos: p0(102),
kind: MatchKind::Match,
query: Base::G,
qual: BaseQuality::from_byte(32),
}
);
}
#[test]
fn with_read_slices_inserted_bases() {
let cigar = vec![
op(CigarOpType::Match, 2),
op(CigarOpType::Insertion, 3),
op(CigarOpType::Match, 2),
];
let seq = vec![Base::A, Base::C, Base::T, Base::G, Base::T, Base::A, Base::A];
let qual: Vec<_> = (0..7).map(|i| BaseQuality::from_byte(20 + i)).collect();
let store = make_store(p0(50), cigar, seq, qual);
let rec = store.record(0);
let events: Vec<_> = rec.aligned_pairs_with_read(&store).unwrap().collect();
assert_eq!(events.len(), 5);
match events[2] {
AlignedPairWithRead::Insertion { qpos, query, qual } => {
assert_eq!(qpos, 2);
assert_eq!(query, &[Base::T, Base::G, Base::T]);
assert_eq!(qual.len(), 3);
assert_eq!(qual[0].as_byte(), 22);
assert_eq!(qual[2].as_byte(), 24);
}
other => panic!("expected Insertion, got {other:?}"),
}
}
#[test]
fn with_read_slices_soft_clip_bases() {
let cigar = vec![op(CigarOpType::SoftClip, 2), op(CigarOpType::Match, 3)];
let seq = vec![Base::Unknown, Base::Unknown, Base::A, Base::C, Base::G];
let qual: Vec<_> = (0..5).map(|_| BaseQuality::from_byte(20)).collect();
let store = make_store(p0(0), cigar, seq, qual);
let rec = store.record(0);
let events: Vec<_> =
rec.aligned_pairs_with_read(&store).unwrap().with_soft_clips().collect();
match events[0] {
AlignedPairWithRead::SoftClip { qpos, query, qual } => {
assert_eq!(qpos, 0);
assert_eq!(query, &[Base::Unknown, Base::Unknown]);
assert_eq!(qual.len(), 2);
}
other => panic!("expected SoftClip, got {other:?}"),
}
}
#[test]
fn with_read_passes_deletion_through_unchanged() {
let cigar = vec![
op(CigarOpType::Match, 2),
op(CigarOpType::Deletion, 3),
op(CigarOpType::Match, 2),
];
let seq = vec![Base::A; 4];
let qual = vec![BaseQuality::from_byte(30); 4];
let store = make_store(p0(100), cigar, seq, qual);
let rec = store.record(0);
let events: Vec<_> = rec.aligned_pairs_with_read(&store).unwrap().collect();
assert_eq!(events.len(), 5);
assert_eq!(events[2], AlignedPairWithRead::Deletion { rpos: p0(102), del_len: 3 });
}
fn make_ref_seq(start: u32, bases: &[Base]) -> RefSeq {
RefSeq::new(Rc::from(bases.to_vec()), p0(start))
}
#[test]
fn with_reference_attaches_ref_base_to_match() {
let cigar = vec![op(CigarOpType::Match, 3)];
let seq = vec![Base::A, Base::T, Base::G];
let qual = vec![BaseQuality::from_byte(30); 3];
let store = make_store(p0(100), cigar, seq, qual);
let rec = store.record(0);
let ref_seq = make_ref_seq(100, &[Base::A, Base::C, Base::G, Base::T]);
let events: Vec<_> =
rec.aligned_pairs_with_read(&store).unwrap().with_reference(&ref_seq).collect();
assert!(matches!(
events[0],
AlignedPairWithRef::Match { ref_base: Some(Base::A), query: Base::A, .. }
));
assert!(matches!(
events[1],
AlignedPairWithRef::Match { ref_base: Some(Base::C), query: Base::T, .. }
));
assert!(matches!(
events[2],
AlignedPairWithRef::Match { ref_base: Some(Base::G), query: Base::G, .. }
));
}
#[test]
fn with_reference_returns_none_outside_loaded_window() {
let cigar = vec![op(CigarOpType::Match, 2)];
let seq = vec![Base::A; 2];
let qual = vec![BaseQuality::from_byte(30); 2];
let store = make_store(p0(100), cigar, seq, qual);
let rec = store.record(0);
let ref_seq = make_ref_seq(200, &[Base::A, Base::C, Base::G]);
let events: Vec<_> =
rec.aligned_pairs_with_read(&store).unwrap().with_reference(&ref_seq).collect();
for ev in &events {
assert!(
matches!(ev, AlignedPairWithRef::Match { ref_base: None, .. }),
"expected ref_base=None, got {ev:?}"
);
}
}
#[test]
fn with_reference_slices_ref_bases_for_deletion() {
let cigar = vec![
op(CigarOpType::Match, 2),
op(CigarOpType::Deletion, 3),
op(CigarOpType::Match, 1),
];
let seq = vec![Base::A; 3];
let qual = vec![BaseQuality::from_byte(30); 3];
let store = make_store(p0(100), cigar, seq, qual);
let rec = store.record(0);
let ref_bases: Vec<Base> = b"ACGTACGTAC".iter().map(|&b| Base::from(b)).collect();
let ref_seq = make_ref_seq(100, &ref_bases);
let events: Vec<_> =
rec.aligned_pairs_with_read(&store).unwrap().with_reference(&ref_seq).collect();
assert_eq!(events.len(), 4);
match events[2] {
AlignedPairWithRef::Deletion { rpos, del_len, ref_bases: Some(bases) } => {
assert_eq!(rpos, p0(102));
assert_eq!(del_len, 3);
assert_eq!(bases, &[Base::G, Base::T, Base::A]);
}
other => panic!("expected Deletion with ref_bases, got {other:?}"),
}
}
#[test]
fn with_reference_returns_none_for_partial_deletion_overlap() {
let cigar = vec![
op(CigarOpType::Match, 2),
op(CigarOpType::Deletion, 3),
op(CigarOpType::Match, 1),
];
let seq = vec![Base::A; 3];
let qual = vec![BaseQuality::from_byte(30); 3];
let store = make_store(p0(100), cigar, seq, qual);
let rec = store.record(0);
let ref_seq = make_ref_seq(100, &[Base::A, Base::C, Base::G]);
let events: Vec<_> =
rec.aligned_pairs_with_read(&store).unwrap().with_reference(&ref_seq).collect();
match events[2] {
AlignedPairWithRef::Deletion { ref_bases: None, .. } => {}
other => panic!("expected Deletion with ref_bases=None, got {other:?}"),
}
}
#[test]
fn with_reference_then_with_soft_clips_chains_correctly() {
let cigar = vec![op(CigarOpType::SoftClip, 2), op(CigarOpType::Match, 3)];
let seq = vec![Base::Unknown, Base::Unknown, Base::A, Base::C, Base::G];
let qual = vec![BaseQuality::from_byte(20); 5];
let store = make_store(p0(100), cigar, seq, qual);
let rec = store.record(0);
let ref_seq = make_ref_seq(100, &[Base::A, Base::C, Base::G]);
let default_events: Vec<_> =
rec.aligned_pairs_with_read(&store).unwrap().with_reference(&ref_seq).collect();
assert_eq!(default_events.len(), 3);
assert!(default_events.iter().all(|e| matches!(e, AlignedPairWithRef::Match { .. })));
let chained_events: Vec<_> = rec
.aligned_pairs_with_read(&store)
.unwrap()
.with_reference(&ref_seq)
.with_soft_clips()
.collect();
assert_eq!(chained_events.len(), 4, "1 SoftClip + 3 Match");
assert!(matches!(chained_events[0], AlignedPairWithRef::SoftClip { .. }));
assert!(matches!(chained_events[1], AlignedPairWithRef::Match { .. }));
assert!(matches!(chained_events[3], AlignedPairWithRef::Match { .. }));
}
#[test]
fn with_reference_full_yields_padding_and_unknown() {
let cigar = vec![
op(CigarOpType::Match, 1),
op(CigarOpType::Padding, 2),
op(CigarOpType::Unknown(9), 3),
op(CigarOpType::Match, 1),
];
let seq = vec![Base::A, Base::C];
let qual = vec![BaseQuality::from_byte(30); 2];
let store = make_store(p0(50), cigar, seq, qual);
let rec = store.record(0);
let ref_seq = make_ref_seq(50, &[Base::A, Base::C]);
let events: Vec<_> =
rec.aligned_pairs_with_read(&store).unwrap().with_reference(&ref_seq).full().collect();
assert_eq!(events.len(), 4);
assert!(matches!(events[0], AlignedPairWithRef::Match { .. }));
assert_eq!(events[1], AlignedPairWithRef::Padding { len: 2 });
assert!(matches!(events[2], AlignedPairWithRef::Unknown { code: 9, len: 3 }));
assert!(matches!(events[3], AlignedPairWithRef::Match { .. }));
}
#[test]
fn with_read_rejects_seq_shorter_than_cigar_query_len() {
let cigar = [op(CigarOpType::Match, 3)];
let seq = vec![Base::A, Base::C];
let qual = vec![BaseQuality::from_byte(30); 2];
let result = AlignedPairs::new(p0(0), &cigar).with_read(&seq, &qual);
match result {
Err(super::super::aligned_pairs::AlignedPairsError::CigarSeqLengthMismatch {
cigar_qlen: 3,
seq_len: 2,
}) => {}
other => panic!("expected CigarSeqLengthMismatch{{3,2}}, got {other:?}"),
}
}
#[test]
fn with_read_rejects_seq_longer_than_cigar_query_len() {
let cigar = [op(CigarOpType::Match, 2)];
let seq = vec![Base::A, Base::C, Base::G];
let qual = vec![BaseQuality::from_byte(30); 3];
let result = AlignedPairs::new(p0(0), &cigar).with_read(&seq, &qual);
assert!(matches!(
result,
Err(super::super::aligned_pairs::AlignedPairsError::CigarSeqLengthMismatch {
cigar_qlen: 2,
seq_len: 3,
})
));
}
#[test]
fn with_read_rejects_qual_length_mismatch() {
let cigar = [op(CigarOpType::Match, 3)];
let seq = vec![Base::A; 3];
let qual = vec![BaseQuality::from_byte(30); 2]; let result = AlignedPairs::new(p0(0), &cigar).with_read(&seq, &qual);
assert!(matches!(
result,
Err(super::super::aligned_pairs::AlignedPairsError::SeqQualLengthMismatch {
seq_len: 3,
qual_len: 2,
})
));
}
#[test]
fn with_read_accepts_empty_qual_as_missing_sentinel() {
let cigar = [op(CigarOpType::Match, 3)];
let seq = vec![Base::A; 3];
let qual: Vec<BaseQuality> = vec![];
let it = AlignedPairs::new(p0(0), &cigar).with_read(&seq, &qual).unwrap();
let events: Vec<_> = it.collect();
for ev in events {
match ev {
AlignedPairWithRead::Match { qual, .. } => {
assert_eq!(qual, BaseQuality::UNAVAILABLE);
}
_ => panic!("expected only Match events for 3M, got {ev:?}"),
}
}
}
#[test]
fn methylation_pattern_finds_c_to_t_conversion() {
let cigar = vec![op(CigarOpType::Match, 3)];
let seq = vec![Base::T, Base::A, Base::G];
let qual = vec![BaseQuality::from_byte(35); 3];
let store = make_store(p0(100), cigar, seq, qual);
let rec = store.record(0);
let ref_seq = make_ref_seq(100, &[Base::C, Base::A, Base::G]);
let mut conversions = 0;
for ev in rec.aligned_pairs_with_read(&store).unwrap().with_reference(&ref_seq) {
if let AlignedPairWithRef::Match {
ref_base: Some(Base::C), query: Base::T, qual, ..
} = ev
&& qual.get().unwrap_or(0) >= 20
{
conversions += 1;
}
}
assert_eq!(conversions, 1, "expected exactly one C→T conversion at pos 100");
}
#[test]
fn with_read_matches_only_yields_matched_bases() {
let cigar = vec![
op(CigarOpType::Match, 2),
op(CigarOpType::Insertion, 1),
op(CigarOpType::Match, 2),
];
let seq = vec![Base::A, Base::C, Base::T, Base::G, Base::Unknown];
let qual = vec![BaseQuality::from_byte(30); 5];
let store = make_store(p0(50), cigar, seq, qual);
let rec = store.record(0);
let matches: Vec<_> = rec.aligned_pairs_with_read(&store).unwrap().matches_only().collect();
assert_eq!(matches.len(), 4, "expected 2M + 2M = 4 match positions");
assert_eq!(matches[0].qpos, 0);
assert_eq!(matches[0].query, Base::A);
assert_eq!(matches[0].kind, MatchKind::Match);
assert_eq!(matches[3].qpos, 4);
assert_eq!(matches[3].query, Base::Unknown);
}
#[test]
fn with_reference_matches_only_yields_matched_refs() {
let cigar = vec![op(CigarOpType::Match, 3)];
let seq = vec![Base::A, Base::T, Base::G];
let qual = vec![BaseQuality::from_byte(30); 3];
let store = make_store(p0(100), cigar, seq, qual);
let rec = store.record(0);
let ref_seq = make_ref_seq(100, &[Base::A, Base::C, Base::G]);
let matches: Vec<_> = rec
.aligned_pairs_with_read(&store)
.unwrap()
.with_reference(&ref_seq)
.matches_only()
.collect();
assert_eq!(matches.len(), 3);
assert_eq!(matches[0].ref_base, Some(Base::A));
assert_eq!(matches[0].query, Base::A);
assert_eq!(matches[1].ref_base, Some(Base::C));
assert_eq!(matches[1].query, Base::T); assert_eq!(matches[2].ref_base, Some(Base::G));
assert_eq!(matches[2].query, Base::G);
}
#[test]
fn matches_only_skips_indels_and_clips() {
let cigar = vec![
op(CigarOpType::SoftClip, 2),
op(CigarOpType::Match, 2),
op(CigarOpType::Insertion, 1),
op(CigarOpType::Deletion, 1),
op(CigarOpType::Match, 2),
];
let seq = vec![Base::A; 7];
let qual = vec![BaseQuality::from_byte(30); 7];
let store = make_store(p0(100), cigar, seq, qual);
let rec = store.record(0);
let ref_seq = make_ref_seq(100, &[Base::A; 10]);
let count = rec
.aligned_pairs_with_read(&store)
.unwrap()
.with_reference(&ref_seq)
.matches_only()
.count();
assert_eq!(count, 4, "only 2M + 2M positions; soft clip + I + D dropped");
}
#[test]
fn realistic_150bp_walk_through_all_layers() {
let cigar = vec![
op(CigarOpType::SoftClip, 2),
op(CigarOpType::Match, 95),
op(CigarOpType::Insertion, 5),
op(CigarOpType::Match, 20),
op(CigarOpType::Deletion, 2),
op(CigarOpType::Match, 28),
];
let kinds = [Base::A, Base::C, Base::G, Base::T];
let seq: Vec<Base> = (0..150).map(|i| kinds[i % 4]).collect();
let qual: Vec<BaseQuality> =
(0..150_u8).map(|i| BaseQuality::from_byte(30u8.saturating_add(i % 10))).collect();
let read_pos = p0(1_000_000);
let store = make_store(read_pos, cigar, seq, qual);
let rec = store.record(0);
let ref_window: Vec<Base> =
(0..145).map(|i| if i % 10 == 0 { Base::C } else { Base::A }).collect();
let ref_seq = RefSeq::new(std::rc::Rc::from(ref_window.clone()), read_pos);
let bare_events: Vec<_> = rec.aligned_pairs(&store).unwrap().collect();
let n_match = bare_events.iter().filter(|e| matches!(e, AlignedPair::Match { .. })).count();
let n_ins =
bare_events.iter().filter(|e| matches!(e, AlignedPair::Insertion { .. })).count();
let n_del =
bare_events.iter().filter(|e| matches!(e, AlignedPair::Deletion { .. })).count();
assert_eq!(n_match, 95 + 20 + 28, "expanded match positions");
assert_eq!(n_ins, 1, "one insertion summary event");
assert_eq!(n_del, 1, "one deletion summary event");
let first_match = bare_events
.iter()
.find_map(|e| if let AlignedPair::Match { qpos, .. } = e { Some(*qpos) } else { None })
.unwrap();
assert_eq!(first_match, 2);
let rich_events: Vec<_> = rec.aligned_pairs_with_read(&store).unwrap().collect();
let insertion = rich_events
.iter()
.find(|e| matches!(e, AlignedPairWithRead::Insertion { .. }))
.unwrap();
match *insertion {
AlignedPairWithRead::Insertion { qpos, query, qual } => {
assert_eq!(qpos, 97, "insertion starts at qpos 97 (after 2S+95M)");
assert_eq!(query.len(), 5);
assert_eq!(qual.len(), 5);
assert_eq!(query[0], Base::C);
assert_eq!(query[4], kinds[(97 + 4) % 4]); }
_ => unreachable!(),
}
let mut conversions = 0;
let mut deletion_ref: Option<Vec<Base>> = None;
for ev in rec.aligned_pairs_with_read(&store).unwrap().with_reference(&ref_seq) {
match ev {
AlignedPairWithRef::Match { ref_base: Some(Base::C), query: Base::T, .. } => {
conversions += 1;
}
AlignedPairWithRef::Deletion { rpos, del_len, ref_bases: Some(bases) } => {
assert_eq!(del_len, 2);
assert_eq!(*rpos, 1_000_115);
deletion_ref = Some(bases.to_vec());
}
_ => {}
}
}
assert!(deletion_ref.is_some(), "expected one Deletion event with ref_bases");
assert_eq!(deletion_ref.unwrap().len(), 2);
assert!(conversions > 0, "expected at least one C→T conversion in 143 match positions");
assert!(conversions < 143, "conversions can't exceed total match positions");
}
mod proptests {
use super::super::super::aligned_pairs::AlignedPairs;
use super::*;
use proptest::prelude::*;
fn arb_cigar_op() -> impl Strategy<Value = CigarOp> {
(0u8..=14u8, 0u32..=10u32).prop_map(|(code, len)| {
let t = CigarOpType::from_bam(code);
CigarOp::new(t, len)
})
}
proptest! {
#[test]
fn with_read_preserves_event_sequence(
ops in proptest::collection::vec(arb_cigar_op(), 0..15),
start in 0u32..=10_000u32,
) {
let qlen: u32 = ops
.iter()
.filter(|o| matches!(
o.op_type(),
CigarOpType::Match
| CigarOpType::Insertion
| CigarOpType::SoftClip
| CigarOpType::SeqMatch
| CigarOpType::SeqMismatch
))
.map(|o| o.len())
.sum();
let seq: Vec<Base> = (0..qlen).map(|_| Base::A).collect();
let qual: Vec<BaseQuality> = (0..qlen).map(|_| BaseQuality::from_byte(30)).collect();
let pos = Pos0::new(start).unwrap();
let bare: Vec<_> = AlignedPairs::new(pos, &ops).with_soft_clips().collect();
let rich_iter = AlignedPairs::new(pos, &ops)
.with_soft_clips()
.with_read(&seq, &qual);
let rich = match rich_iter {
Ok(it) => it.collect::<Vec<_>>(),
Err(e) => {
prop_assert!(false, "with_read validation failed unexpectedly: {e}");
return Ok(());
}
};
prop_assert_eq!(bare.len(), rich.len());
for (b, r) in bare.iter().zip(rich.iter()) {
let consistent = match (b, r) {
(
AlignedPair::Match { qpos: bq, rpos: br, kind: bk },
AlignedPairWithRead::Match { qpos: rq, rpos: rr, kind: rk, .. },
) => bq == rq && br == rr && bk == rk,
(
AlignedPair::Insertion { qpos: bq, insert_len: bl },
AlignedPairWithRead::Insertion { qpos: rq, query, .. },
) => bq == rq && (*bl) as usize == query.len(),
(
AlignedPair::Deletion { rpos: br, del_len: bl },
AlignedPairWithRead::Deletion { rpos: rr, del_len: rl },
) => br == rr && bl == rl,
(
AlignedPair::RefSkip { rpos: br, skip_len: bl },
AlignedPairWithRead::RefSkip { rpos: rr, skip_len: rl },
) => br == rr && bl == rl,
(
AlignedPair::SoftClip { qpos: bq, len: bl },
AlignedPairWithRead::SoftClip { qpos: rq, query, .. },
) => bq == rq && (*bl) as usize == query.len(),
(
AlignedPair::Padding { len: bl },
AlignedPairWithRead::Padding { len: rl },
) => bl == rl,
(
AlignedPair::Unknown { code: bc, len: bl },
AlignedPairWithRead::Unknown { code: rc, len: rl },
) => bc == rc && bl == rl,
_ => false,
};
prop_assert!(consistent, "variant mismatch: bare={:?} rich={:?}", b, r);
}
}
}
proptest! {
#[test]
fn with_read_slice_contents_match_seq_at_qpos(
ops in proptest::collection::vec(arb_cigar_op(), 0..10),
start in 0u32..=10_000u32,
) {
let qlen: u32 = ops
.iter()
.filter(|o| matches!(
o.op_type(),
CigarOpType::Match
| CigarOpType::Insertion
| CigarOpType::SoftClip
| CigarOpType::SeqMatch
| CigarOpType::SeqMismatch
))
.map(|o| o.len())
.sum();
let bases = [Base::A, Base::C, Base::G, Base::T, Base::Unknown];
let seq: Vec<Base> =
(0..qlen).map(|i| bases[(i % 5) as usize]).collect();
let qual: Vec<BaseQuality> = (0..qlen)
.map(|i| BaseQuality::from_byte(u8::try_from(i % 60).unwrap_or(0)))
.collect();
let pos = Pos0::new(start).unwrap();
let it = AlignedPairs::new(pos, &ops)
.with_soft_clips()
.with_read(&seq, &qual);
let it = match it {
Ok(x) => x,
Err(e) => {
prop_assert!(false, "with_read validation failed: {e}");
return Ok(());
}
};
for ev in it {
match ev {
AlignedPairWithRead::Match { qpos, query, qual, .. } => {
let expected_base = bases[(qpos % 5) as usize];
prop_assert_eq!(
query, expected_base,
"Match query at qpos={} != seq[qpos]", qpos
);
let expected_qual = BaseQuality::from_byte((qpos % 60) as u8);
prop_assert_eq!(
qual, expected_qual,
"Match qual at qpos={} != qual[qpos]", qpos
);
}
AlignedPairWithRead::Insertion { qpos, query, qual } => {
for (offset, &b) in query.iter().enumerate() {
let q = qpos as usize + offset;
prop_assert_eq!(
b, bases[q % 5],
"Insertion query[{}] (abs qpos={}) mismatch", offset, q
);
}
for (offset, &q_val) in qual.iter().enumerate() {
let q = qpos as usize + offset;
let q_byte = u8::try_from(q % 60).unwrap_or(0);
prop_assert_eq!(
q_val, BaseQuality::from_byte(q_byte),
"Insertion qual[{}] (abs qpos={}) mismatch", offset, q
);
}
}
AlignedPairWithRead::SoftClip { qpos, query, qual } => {
for (offset, &b) in query.iter().enumerate() {
let q = qpos as usize + offset;
prop_assert_eq!(
b, bases[q % 5],
"SoftClip query[{}] (abs qpos={}) mismatch", offset, q
);
}
for (offset, &q_val) in qual.iter().enumerate() {
let q = qpos as usize + offset;
let q_byte = u8::try_from(q % 60).unwrap_or(0);
prop_assert_eq!(
q_val, BaseQuality::from_byte(q_byte),
"SoftClip qual[{}] (abs qpos={}) mismatch", offset, q
);
}
}
_ => {}
}
}
}
}
use std::rc::Rc;
proptest! {
#[test]
fn with_reference_contract_matches_refseq_lookups(
ops in proptest::collection::vec(arb_cigar_op(), 0..8),
pos_offset in 0u32..=200u32,
ref_start in 0u32..=200u32,
ref_len in 0u32..=300u32,
) {
let pos_offset = pos_offset.min(1_000);
let ref_start = ref_start.min(1_000);
let ref_len = ref_len.min(500);
let qlen: u32 = ops
.iter()
.filter(|o| matches!(
o.op_type(),
CigarOpType::Match
| CigarOpType::Insertion
| CigarOpType::SoftClip
| CigarOpType::SeqMatch
| CigarOpType::SeqMismatch
))
.map(|o| o.len())
.sum();
let seq: Vec<Base> = (0..qlen).map(|_| Base::A).collect();
let qual: Vec<BaseQuality> =
(0..qlen).map(|_| BaseQuality::from_byte(30)).collect();
let ref_bases_kinds = [Base::A, Base::C, Base::G, Base::T];
let ref_buf: Vec<Base> = (0..ref_len)
.map(|i| ref_bases_kinds[(i % 4) as usize])
.collect();
let ref_seq = RefSeq::new(Rc::from(ref_buf), Pos0::new(ref_start).unwrap());
let read_pos = Pos0::new(pos_offset).unwrap();
let it = AlignedPairs::new(read_pos, &ops)
.with_soft_clips()
.with_read(&seq, &qual);
let it = match it {
Ok(x) => x.with_reference(&ref_seq),
Err(e) => {
prop_assert!(false, "with_read validation failed: {e}");
return Ok(());
}
};
for ev in it {
match ev {
AlignedPairWithRef::Match { rpos, ref_base, .. } => {
let expected = ref_seq.try_base_at(rpos);
prop_assert_eq!(
ref_base, expected,
"Match ref_base at rpos={:?} disagrees with try_base_at",
rpos,
);
}
AlignedPairWithRef::Deletion { rpos, del_len, ref_bases } => {
let expected = ref_seq.range(rpos, del_len);
prop_assert_eq!(
ref_bases, expected,
"Deletion ref_bases at rpos={:?}, len={} disagrees with range",
rpos, del_len,
);
}
_ => {}
}
}
}
}
proptest! {
#[test]
fn with_read_matches_only_equals_manual_filter(
ops in proptest::collection::vec(arb_cigar_op(), 0..10),
start in 0u32..=10_000u32,
) {
let qlen: u32 = ops
.iter()
.filter(|o| matches!(
o.op_type(),
CigarOpType::Match
| CigarOpType::Insertion
| CigarOpType::SoftClip
| CigarOpType::SeqMatch
| CigarOpType::SeqMismatch
))
.map(|o| o.len())
.sum();
let seq: Vec<Base> = (0..qlen).map(|_| Base::A).collect();
let qual: Vec<BaseQuality> =
(0..qlen).map(|_| BaseQuality::from_byte(30)).collect();
let pos = Pos0::new(start).unwrap();
let it = AlignedPairs::new(pos, &ops)
.with_soft_clips()
.with_read(&seq, &qual);
let it = match it {
Ok(x) => x,
Err(e) => {
prop_assert!(false, "with_read validation failed: {e}");
return Ok(());
}
};
let manual: Vec<MatchedBase> = it
.clone()
.filter_map(|ev| match ev {
AlignedPairWithRead::Match { qpos, rpos, kind, query, qual } => {
Some(MatchedBase { qpos, rpos, kind, query, qual })
}
_ => None,
})
.collect();
let via_adapter: Vec<MatchedBase> = it.matches_only().collect();
prop_assert_eq!(manual, via_adapter);
}
}
}
}