use crate::sam::{SamTag, record_utils, to_smallest_signed_int};
use crate::unified_pipeline::MemoryEstimate;
use anyhow::{Result, anyhow, bail};
use noodles::sam::alignment::record::Cigar;
use noodles::sam::alignment::record::data::field::Tag;
use noodles::sam::alignment::record_buf::RecordBuf;
use noodles::sam::alignment::record_buf::data::field::Value as BufValue;
use std::fmt::Write as _;
pub use crate::sam::PairOrientation;
pub use fgumi_umi::MoleculeId;
#[derive(Debug, Clone)]
pub struct Template {
pub name: Vec<u8>,
pub records: Vec<RecordBuf>,
pub raw_records: Option<Vec<Vec<u8>>>,
pub r1: Option<(usize, usize)>,
pub r2: Option<(usize, usize)>,
pub r1_supplementals: Option<(usize, usize)>,
pub r2_supplementals: Option<(usize, usize)>,
pub r1_secondaries: Option<(usize, usize)>,
pub r2_secondaries: Option<(usize, usize)>,
pub mi: MoleculeId,
}
pub type TemplateBatch = Vec<Template>;
#[derive(Debug, Default)]
pub struct Builder {
name: Option<Vec<u8>>,
r1: Option<RecordBuf>,
r2: Option<RecordBuf>,
r1_supplementals: Vec<RecordBuf>,
r2_supplementals: Vec<RecordBuf>,
r1_secondaries: Vec<RecordBuf>,
r2_secondaries: Vec<RecordBuf>,
}
impl Builder {
fn new() -> Self {
Builder {
name: None,
r1: None,
r2: None,
r1_supplementals: Vec::new(),
r2_supplementals: Vec::new(),
r1_secondaries: Vec::new(),
r2_secondaries: Vec::new(),
}
}
pub fn push(&mut self, record: RecordBuf) -> Result<&mut Builder> {
let record_name_bytes: &[u8] = record.name().map_or(&[], <_ as AsRef<[u8]>>::as_ref);
if let Some(cur_name) = &self.name {
if cur_name.as_slice() != record_name_bytes {
bail!(
"Template name mismatch: found '{record_name_bytes:?}', expected '{cur_name:?}'"
);
}
} else {
self.name = Some(record_name_bytes.to_vec());
}
let flags = record.flags();
let is_r1 = !flags.is_segmented() || flags.is_first_segment();
if is_r1 {
if flags.is_secondary() {
self.r1_secondaries.push(record);
} else if flags.is_supplementary() {
self.r1_supplementals.push(record);
} else if self.r1.is_some() {
return Err(anyhow!(
"Multiple non-secondary, non-supplemental R1 records for read '{:?}'",
self.name.as_ref().expect("name must be set before adding duplicate R1")
));
} else {
self.r1 = Some(record);
}
} else if flags.is_secondary() {
self.r2_secondaries.push(record);
} else if flags.is_supplementary() {
self.r2_supplementals.push(record);
} else if self.r2.is_some() {
return Err(anyhow!(
"Multiple non-secondary, non-supplemental R2 records for read '{:?}'",
self.name.as_ref().expect("name must be set before adding duplicate R2")
));
} else {
self.r2 = Some(record);
}
Ok(self)
}
#[must_use]
pub fn len(&self) -> usize {
let mut count = 0;
count += usize::from(self.r1.is_some());
count += usize::from(self.r2.is_some());
count += self.r1_supplementals.len();
count += self.r2_supplementals.len();
count += self.r1_secondaries.len();
count += self.r2_secondaries.len();
count
}
#[must_use]
pub fn is_empty(&self) -> bool {
self.len() == 0
}
pub fn build(&mut self) -> Result<Template> {
let r1_end: usize = usize::from(self.r1.is_some());
let r2_end: usize = r1_end + usize::from(self.r2.is_some());
let r1_supplementals_end: usize = r2_end + self.r1_supplementals.len();
let r2_supplementals_end: usize = r1_supplementals_end + self.r2_supplementals.len();
let r1_secondaries_end: usize = r2_supplementals_end + self.r1_secondaries.len();
let r2_secondaries_end: usize = r1_secondaries_end + self.r2_secondaries.len();
let r1: Option<(usize, usize)> = if self.r1.is_some() { Some((0, r1_end)) } else { None };
let r2: Option<(usize, usize)> =
if self.r2.is_some() { Some((r1_end, r2_end)) } else { None };
let r1_supplementals: Option<(usize, usize)> = if self.r1_supplementals.is_empty() {
None
} else {
Some((r2_end, r1_supplementals_end))
};
let r2_supplementals: Option<(usize, usize)> = if self.r2_supplementals.is_empty() {
None
} else {
Some((r1_supplementals_end, r2_supplementals_end))
};
let r1_secondaries: Option<(usize, usize)> = if self.r1_secondaries.is_empty() {
None
} else {
Some((r2_supplementals_end, r1_secondaries_end))
};
let r2_secondaries: Option<(usize, usize)> = if self.r2_secondaries.is_empty() {
None
} else {
Some((r1_secondaries_end, r2_secondaries_end))
};
let Some(name) = self.name.take() else {
return Err(anyhow!("No records given to template builder"));
};
let mut records: Vec<RecordBuf> = Vec::with_capacity(self.len());
if let Some(rec) = self.r1.take() {
records.push(rec);
}
if let Some(rec) = self.r2.take() {
records.push(rec);
}
self.r1_supplementals.reverse();
self.r2_supplementals.reverse();
self.r1_secondaries.reverse();
self.r2_secondaries.reverse();
records.append(&mut self.r1_supplementals);
records.append(&mut self.r2_supplementals);
records.append(&mut self.r1_secondaries);
records.append(&mut self.r2_secondaries);
Ok(Template {
name,
records,
raw_records: None,
r1,
r2,
r1_supplementals,
r2_supplementals,
r1_secondaries,
r2_secondaries,
mi: MoleculeId::None,
})
}
}
impl Template {
#[must_use]
pub fn r1(&self) -> Option<&RecordBuf> {
if self.records.is_empty() {
return None;
}
self.r1.map(|_| &self.records[0])
}
#[must_use]
pub fn r2(&self) -> Option<&RecordBuf> {
if self.records.is_empty() {
return None;
}
self.r2.map(|(i, _)| &self.records[i])
}
fn records_in_range(&self, range: Option<(usize, usize)>) -> &[RecordBuf] {
if let Some((start, end)) = range {
if start <= end && end <= self.records.len() {
return &self.records[start..end];
}
}
&[]
}
#[must_use]
pub fn r1_supplementals(&self) -> &[RecordBuf] {
self.records_in_range(self.r1_supplementals)
}
#[must_use]
pub fn r2_supplementals(&self) -> &[RecordBuf] {
self.records_in_range(self.r2_supplementals)
}
#[must_use]
pub fn r1_secondaries(&self) -> &[RecordBuf] {
self.records_in_range(self.r1_secondaries)
}
#[must_use]
pub fn r2_secondaries(&self) -> &[RecordBuf] {
self.records_in_range(self.r2_secondaries)
}
#[must_use]
pub fn new(name: Vec<u8>) -> Self {
Template {
name,
records: Vec::new(),
raw_records: None,
r1: None,
r2: None,
r1_supplementals: None,
r2_supplementals: None,
r1_secondaries: None,
r2_secondaries: None,
mi: MoleculeId::None,
}
}
pub fn from_records<I>(records: I) -> Result<Self>
where
I: IntoIterator<Item = RecordBuf>,
{
let mut builder = Builder::new();
for record in records {
builder.push(record)?;
}
builder.build()
}
pub fn primary_reads(&self) -> impl Iterator<Item = &RecordBuf> {
let records = &self.records;
self.r1.iter().chain(self.r2.iter()).filter_map(move |(start, _)| records.get(*start))
}
#[must_use]
pub fn into_primary_reads(mut self) -> (Option<RecordBuf>, Option<RecordBuf>) {
if self.records.is_empty() {
return (None, None);
}
let r2 = self.r2.and_then(|(i, _)| self.records.get_mut(i).map(std::mem::take));
let r1 = self.r1.and_then(|_| self.records.get_mut(0).map(std::mem::take));
(r1, r2)
}
pub fn all_supplementary_and_secondary(&self) -> impl Iterator<Item = &RecordBuf> {
let records = &self.records;
let iter = self
.r1_supplementals
.iter()
.chain(self.r2_supplementals.iter())
.chain(self.r1_secondaries.iter())
.chain(self.r2_secondaries.iter());
iter.flat_map(move |(start, end)| {
let s = (*start).min(records.len());
let e = (*end).min(records.len());
records[s..e].iter()
})
}
pub fn all_reads(&self) -> impl Iterator<Item = &RecordBuf> {
self.primary_reads().chain(self.all_supplementary_and_secondary())
}
#[must_use]
pub fn into_records(self) -> Vec<RecordBuf> {
self.records
}
pub fn all_r1s(&self) -> impl Iterator<Item = &RecordBuf> {
let records = &self.records;
let iter =
self.r1.iter().chain(self.r1_secondaries.iter()).chain(self.r1_supplementals.iter());
iter.flat_map(move |(start, end)| {
let s = (*start).min(records.len());
let e = (*end).min(records.len());
records[s..e].iter()
})
}
pub fn all_r2s(&self) -> impl Iterator<Item = &RecordBuf> {
let records = &self.records;
let iter =
self.r2.iter().chain(self.r2_secondaries.iter()).chain(self.r2_supplementals.iter());
iter.flat_map(move |(start, end)| {
let s = (*start).min(records.len());
let e = (*end).min(records.len());
records[s..e].iter()
})
}
#[must_use]
pub fn read_count(&self) -> usize {
if let Some(ref rr) = self.raw_records { rr.len() } else { self.records.len() }
}
#[inline]
#[must_use]
pub fn is_raw_byte_mode(&self) -> bool {
self.raw_records.is_some()
}
#[must_use]
pub fn raw_r1(&self) -> Option<&[u8]> {
let rr = self.raw_records.as_ref()?;
self.r1.map(|_| rr[0].as_slice())
}
#[must_use]
pub fn raw_r2(&self) -> Option<&[u8]> {
let rr = self.raw_records.as_ref()?;
self.r2.map(|(i, _)| rr[i].as_slice())
}
#[must_use]
pub fn all_raw_records(&self) -> Option<&[Vec<u8>]> {
self.raw_records.as_deref()
}
#[must_use]
pub fn into_raw_records(self) -> Option<Vec<Vec<u8>>> {
self.raw_records
}
pub fn all_raw_records_mut(&mut self) -> Option<&mut [Vec<u8>]> {
self.raw_records.as_deref_mut()
}
#[allow(clippy::too_many_lines)]
pub fn from_raw_records(mut raw_records: Vec<Vec<u8>>) -> Result<Self> {
use crate::sort::bam_fields;
if raw_records.is_empty() {
bail!("No records given to from_raw_records");
}
for (i, r) in raw_records.iter().enumerate() {
if r.len() < bam_fields::MIN_BAM_RECORD_LEN {
bail!(
"Raw BAM record {i} too short to parse ({} < {})",
r.len(),
bam_fields::MIN_BAM_RECORD_LEN
);
}
let l_rn = r[8] as usize;
if r.len() < 32 + l_rn {
bail!(
"Raw BAM record {i} truncated: l_read_name={l_rn} but only {} bytes after header",
r.len() - 32
);
}
}
let name = bam_fields::read_name(&raw_records[0]).to_vec();
if raw_records.len() == 2 {
let f0 = bam_fields::flags(&raw_records[0]);
let f1 = bam_fields::flags(&raw_records[1]);
let neither_sec_supp =
(f0 | f1) & (bam_fields::flags::SECONDARY | bam_fields::flags::SUPPLEMENTARY) == 0;
if neither_sec_supp {
let is_r1_0 = (f0 & bam_fields::flags::PAIRED) == 0
|| (f0 & bam_fields::flags::FIRST_SEGMENT) != 0;
let is_r1_1 = (f1 & bam_fields::flags::PAIRED) == 0
|| (f1 & bam_fields::flags::FIRST_SEGMENT) != 0;
if is_r1_0 != is_r1_1 {
if !is_r1_0 {
raw_records.swap(0, 1);
}
return Ok(Template {
name,
records: Vec::new(),
raw_records: Some(raw_records),
r1: Some((0, 1)),
r2: Some((1, 2)),
r1_supplementals: None,
r2_supplementals: None,
r1_secondaries: None,
r2_secondaries: None,
mi: MoleculeId::None,
});
}
}
}
for rec in raw_records.iter().skip(1) {
if bam_fields::read_name(rec) != name.as_slice() {
bail!("Template name mismatch in from_raw_records");
}
}
let mut r1_idx: Option<usize> = None;
let mut r2_idx: Option<usize> = None;
let mut r1_supp: Vec<usize> = Vec::new();
let mut r2_supp: Vec<usize> = Vec::new();
let mut r1_sec: Vec<usize> = Vec::new();
let mut r2_sec: Vec<usize> = Vec::new();
for (i, rec) in raw_records.iter().enumerate() {
let flg = bam_fields::flags(rec);
let is_secondary = (flg & bam_fields::flags::SECONDARY) != 0;
let is_supplementary = (flg & bam_fields::flags::SUPPLEMENTARY) != 0;
let is_paired = (flg & bam_fields::flags::PAIRED) != 0;
let is_first = (flg & bam_fields::flags::FIRST_SEGMENT) != 0;
let is_r1 = !is_paired || is_first;
if is_r1 {
if is_secondary {
r1_sec.push(i);
} else if is_supplementary {
r1_supp.push(i);
} else if r1_idx.is_some() {
bail!(
"Multiple non-secondary, non-supplemental R1 records for read '{name:?}'"
);
} else {
r1_idx = Some(i);
}
} else if is_secondary {
r2_sec.push(i);
} else if is_supplementary {
r2_supp.push(i);
} else if r2_idx.is_some() {
bail!("Multiple non-secondary, non-supplemental R2 records for read '{name:?}'");
} else {
r2_idx = Some(i);
}
}
let mut ordered: Vec<Vec<u8>> = Vec::with_capacity(raw_records.len());
let mut take = |idx: usize| -> Vec<u8> { std::mem::take(&mut raw_records[idx]) };
let r1_pair = if let Some(idx) = r1_idx {
ordered.push(take(idx));
Some((ordered.len() - 1, ordered.len()))
} else {
None
};
let r2_pair = if let Some(idx) = r2_idx {
ordered.push(take(idx));
Some((ordered.len() - 1, ordered.len()))
} else {
None
};
let r1_supp_pair = if r1_supp.is_empty() {
None
} else {
r1_supp.reverse();
let start = ordered.len();
for idx in &r1_supp {
ordered.push(take(*idx));
}
Some((start, ordered.len()))
};
let r2_supp_pair = if r2_supp.is_empty() {
None
} else {
r2_supp.reverse();
let start = ordered.len();
for idx in &r2_supp {
ordered.push(take(*idx));
}
Some((start, ordered.len()))
};
let r1_sec_pair = if r1_sec.is_empty() {
None
} else {
r1_sec.reverse();
let start = ordered.len();
for idx in &r1_sec {
ordered.push(take(*idx));
}
Some((start, ordered.len()))
};
let r2_sec_pair = if r2_sec.is_empty() {
None
} else {
r2_sec.reverse();
let start = ordered.len();
for idx in &r2_sec {
ordered.push(take(*idx));
}
Some((start, ordered.len()))
};
Ok(Template {
name,
records: Vec::new(),
raw_records: Some(ordered),
r1: r1_pair,
r2: r2_pair,
r1_supplementals: r1_supp_pair,
r2_supplementals: r2_supp_pair,
r1_secondaries: r1_sec_pair,
r2_secondaries: r2_sec_pair,
mi: MoleculeId::None,
})
}
#[must_use]
pub fn pair_orientation(&self) -> Option<PairOrientation> {
let r1 = self.r1()?;
let r2 = self.r2()?;
if r1.flags().is_unmapped() || r2.flags().is_unmapped() {
return None;
}
if r1.reference_sequence_id() != r2.reference_sequence_id() {
return None;
}
Some(get_pair_orientation(r1))
}
#[allow(clippy::too_many_lines)]
pub fn fix_mate_info(&mut self) -> Result<()> {
if self.records.is_empty() {
return Ok(());
}
let mapq_tag = Tag::from(SamTag::MQ);
let mate_cigar_tag = Tag::from(SamTag::MC);
let mate_score_tag = Tag::from(SamTag::MS);
let align_score_tag = Tag::from(SamTag::AS);
if let (Some((r1_i, _)), Some((r2_i, _))) = (self.r1, self.r2) {
let r1_is_unmapped = self.records[r1_i].flags().is_unmapped();
let r2_is_unmapped = self.records[r2_i].flags().is_unmapped();
let r1_as = self.records[r1_i].data().get(&align_score_tag).and_then(extract_int_value);
let r2_as = self.records[r2_i].data().get(&align_score_tag).and_then(extract_int_value);
if !r1_is_unmapped && !r2_is_unmapped {
self.set_mate_info_both_mapped(r1_i, r2_i, mapq_tag, mate_cigar_tag)?;
} else if r1_is_unmapped && r2_is_unmapped {
self.set_mate_info_both_unmapped(r1_i, r2_i, mapq_tag, mate_cigar_tag);
} else {
self.set_mate_info_one_unmapped(
r1_i,
r2_i,
r1_is_unmapped,
mapq_tag,
mate_cigar_tag,
)?;
}
if let Some(as_value) = r2_as {
self.records[r1_i]
.data_mut()
.insert(mate_score_tag, to_smallest_signed_int(as_value));
}
if let Some(as_value) = r1_as {
self.records[r2_i]
.data_mut()
.insert(mate_score_tag, to_smallest_signed_int(as_value));
}
}
if let Some((r2_i, _)) = self.r2 {
let r2_ref_id = self.records[r2_i].reference_sequence_id();
let r2_pos = self.records[r2_i].alignment_start();
let r2_is_reverse = self.records[r2_i].flags().is_reverse_complemented();
let r2_is_unmapped = self.records[r2_i].flags().is_unmapped();
let r2_tlen = self.records[r2_i].template_length();
let r2_mapq = self.records[r2_i].mapping_quality();
let r2_cigar_str = cigar_to_string(self.records[r2_i].cigar())?;
let r2_as = self.records[r2_i].data().get(&align_score_tag).and_then(extract_int_value);
if let Some((start, end)) = self.r1_supplementals {
for i in start..end {
*self.records[i].mate_reference_sequence_id_mut() = r2_ref_id;
*self.records[i].mate_alignment_start_mut() = r2_pos;
set_mate_flags(&mut self.records[i], r2_is_reverse, r2_is_unmapped);
*self.records[i].template_length_mut() = -r2_tlen;
let r2_mapq_value = r2_mapq.map_or(255, |m| i32::from(u8::from(m)));
self.records[i]
.data_mut()
.insert(mapq_tag, to_smallest_signed_int(r2_mapq_value));
if !r2_cigar_str.is_empty() && r2_cigar_str != "*" && !r2_is_unmapped {
self.records[i]
.data_mut()
.insert(mate_cigar_tag, BufValue::from(r2_cigar_str.clone()));
}
if let Some(as_value) = r2_as {
self.records[i]
.data_mut()
.insert(mate_score_tag, to_smallest_signed_int(as_value));
}
}
}
}
if let Some((r1_i, _)) = self.r1 {
let r1_ref_id = self.records[r1_i].reference_sequence_id();
let r1_pos = self.records[r1_i].alignment_start();
let r1_is_reverse = self.records[r1_i].flags().is_reverse_complemented();
let r1_is_unmapped = self.records[r1_i].flags().is_unmapped();
let r1_tlen = self.records[r1_i].template_length();
let r1_mapq = self.records[r1_i].mapping_quality();
let r1_cigar_str = cigar_to_string(self.records[r1_i].cigar())?;
let r1_as = self.records[r1_i].data().get(&align_score_tag).and_then(extract_int_value);
if let Some((start, end)) = self.r2_supplementals {
for i in start..end {
*self.records[i].mate_reference_sequence_id_mut() = r1_ref_id;
*self.records[i].mate_alignment_start_mut() = r1_pos;
set_mate_flags(&mut self.records[i], r1_is_reverse, r1_is_unmapped);
*self.records[i].template_length_mut() = -r1_tlen;
let r1_mapq_value = r1_mapq.map_or(255, |m| i32::from(u8::from(m)));
self.records[i]
.data_mut()
.insert(mapq_tag, to_smallest_signed_int(r1_mapq_value));
if !r1_cigar_str.is_empty() && r1_cigar_str != "*" && !r1_is_unmapped {
self.records[i]
.data_mut()
.insert(mate_cigar_tag, BufValue::from(r1_cigar_str.clone()));
}
if let Some(as_value) = r1_as {
self.records[i]
.data_mut()
.insert(mate_score_tag, to_smallest_signed_int(as_value));
}
}
}
}
Ok(())
}
#[allow(clippy::too_many_lines)]
pub fn fix_mate_info_raw(&mut self) -> Result<()> {
use crate::sort::bam_fields;
let rr = self
.raw_records
.as_mut()
.ok_or_else(|| anyhow!("fix_mate_info_raw requires raw-byte mode"))?;
if rr.is_empty() {
return Ok(());
}
if let (Some((r1_i, _)), Some((r2_i, _))) = (self.r1, self.r2) {
let r1_is_unmapped = (bam_fields::flags(&rr[r1_i]) & bam_fields::flags::UNMAPPED) != 0;
let r2_is_unmapped = (bam_fields::flags(&rr[r2_i]) & bam_fields::flags::UNMAPPED) != 0;
let r1_as = bam_fields::find_int_tag(bam_fields::aux_data_slice(&rr[r1_i]), b"AS");
let r2_as = bam_fields::find_int_tag(bam_fields::aux_data_slice(&rr[r2_i]), b"AS");
if !r1_is_unmapped && !r2_is_unmapped {
self.set_mate_info_both_mapped_raw(r1_i, r2_i);
} else if r1_is_unmapped && r2_is_unmapped {
self.set_mate_info_both_unmapped_raw(r1_i, r2_i);
} else {
self.set_mate_info_one_unmapped_raw(r1_i, r2_i, r1_is_unmapped);
}
let rr = self.raw_records.as_mut().unwrap();
if let Some(as_value) = r2_as {
if let Ok(v) = i32::try_from(as_value) {
bam_fields::remove_tag(&mut rr[r1_i], b"ms");
bam_fields::append_signed_int_tag(&mut rr[r1_i], b"ms", v);
}
}
if let Some(as_value) = r1_as {
if let Ok(v) = i32::try_from(as_value) {
bam_fields::remove_tag(&mut rr[r2_i], b"ms");
bam_fields::append_signed_int_tag(&mut rr[r2_i], b"ms", v);
}
}
}
if let Some((r2_i, _)) = self.r2 {
let rr = self.raw_records.as_ref().unwrap();
let r2_ref_id = bam_fields::ref_id(&rr[r2_i]);
let r2_pos = bam_fields::pos(&rr[r2_i]);
let r2_flags = bam_fields::flags(&rr[r2_i]);
let r2_is_reverse = (r2_flags & bam_fields::flags::REVERSE) != 0;
let r2_is_unmapped = (r2_flags & bam_fields::flags::UNMAPPED) != 0;
let r2_tlen = bam_fields::template_length(&rr[r2_i]);
let r2_mapq = bam_fields::mapq(&rr[r2_i]);
let r2_cigar_str = bam_fields::cigar_to_string_from_raw(&rr[r2_i]);
let r2_as = bam_fields::find_int_tag(bam_fields::aux_data_slice(&rr[r2_i]), b"AS");
if let Some((start, end)) = self.r1_supplementals {
let rr = self.raw_records.as_mut().unwrap();
for rec in &mut rr[start..end] {
bam_fields::set_mate_ref_id(rec, r2_ref_id);
bam_fields::set_mate_pos(rec, r2_pos);
set_mate_flags_raw(rec, r2_is_reverse, r2_is_unmapped);
bam_fields::set_template_length(rec, -r2_tlen);
let mq_val = if r2_mapq == 255 { 255 } else { i32::from(r2_mapq) };
bam_fields::update_int_tag(rec, b"MQ", mq_val);
if !r2_cigar_str.is_empty() && r2_cigar_str != "*" && !r2_is_unmapped {
bam_fields::update_string_tag(rec, b"MC", r2_cigar_str.as_bytes());
} else {
bam_fields::remove_tag(rec, b"MC");
}
if let Some(as_value) = r2_as {
if let Ok(v) = i32::try_from(as_value) {
bam_fields::remove_tag(rec, b"ms");
bam_fields::append_signed_int_tag(rec, b"ms", v);
}
}
}
}
}
if let Some((r1_i, _)) = self.r1 {
let rr = self.raw_records.as_ref().unwrap();
let r1_ref_id = bam_fields::ref_id(&rr[r1_i]);
let r1_pos = bam_fields::pos(&rr[r1_i]);
let r1_flags = bam_fields::flags(&rr[r1_i]);
let r1_is_reverse = (r1_flags & bam_fields::flags::REVERSE) != 0;
let r1_is_unmapped = (r1_flags & bam_fields::flags::UNMAPPED) != 0;
let r1_tlen = bam_fields::template_length(&rr[r1_i]);
let r1_mapq = bam_fields::mapq(&rr[r1_i]);
let r1_cigar_str = bam_fields::cigar_to_string_from_raw(&rr[r1_i]);
let r1_as = bam_fields::find_int_tag(bam_fields::aux_data_slice(&rr[r1_i]), b"AS");
if let Some((start, end)) = self.r2_supplementals {
let rr = self.raw_records.as_mut().unwrap();
for rec in &mut rr[start..end] {
bam_fields::set_mate_ref_id(rec, r1_ref_id);
bam_fields::set_mate_pos(rec, r1_pos);
set_mate_flags_raw(rec, r1_is_reverse, r1_is_unmapped);
bam_fields::set_template_length(rec, -r1_tlen);
let mq_val = if r1_mapq == 255 { 255 } else { i32::from(r1_mapq) };
bam_fields::update_int_tag(rec, b"MQ", mq_val);
if !r1_cigar_str.is_empty() && r1_cigar_str != "*" && !r1_is_unmapped {
bam_fields::update_string_tag(rec, b"MC", r1_cigar_str.as_bytes());
} else {
bam_fields::remove_tag(rec, b"MC");
}
if let Some(as_value) = r1_as {
if let Ok(v) = i32::try_from(as_value) {
bam_fields::remove_tag(rec, b"ms");
bam_fields::append_signed_int_tag(rec, b"ms", v);
}
}
}
}
}
Ok(())
}
fn set_mate_info_both_mapped_raw(&mut self, r1_i: usize, r2_i: usize) {
use crate::sort::bam_fields;
let rr = self.raw_records.as_ref().unwrap();
let r2_ref_id = bam_fields::ref_id(&rr[r2_i]);
let r2_pos = bam_fields::pos(&rr[r2_i]);
let r2_is_reverse = (bam_fields::flags(&rr[r2_i]) & bam_fields::flags::REVERSE) != 0;
let r2_mapq = bam_fields::mapq(&rr[r2_i]);
let r2_cigar_str = bam_fields::cigar_to_string_from_raw(&rr[r2_i]);
let r1_ref_id = bam_fields::ref_id(&rr[r1_i]);
let r1_pos = bam_fields::pos(&rr[r1_i]);
let r1_is_reverse = (bam_fields::flags(&rr[r1_i]) & bam_fields::flags::REVERSE) != 0;
let r1_mapq = bam_fields::mapq(&rr[r1_i]);
let r1_cigar_str = bam_fields::cigar_to_string_from_raw(&rr[r1_i]);
let insert_size = compute_insert_size_raw(&rr[r1_i], &rr[r2_i]);
let rr = self.raw_records.as_mut().unwrap();
bam_fields::set_mate_ref_id(&mut rr[r1_i], r2_ref_id);
bam_fields::set_mate_pos(&mut rr[r1_i], r2_pos);
set_mate_flags_raw(&mut rr[r1_i], r2_is_reverse, false);
let r2_mq_val = if r2_mapq == 255 { 255 } else { i32::from(r2_mapq) };
bam_fields::update_int_tag(&mut rr[r1_i], b"MQ", r2_mq_val);
if !r2_cigar_str.is_empty() && r2_cigar_str != "*" {
bam_fields::update_string_tag(&mut rr[r1_i], b"MC", r2_cigar_str.as_bytes());
} else {
bam_fields::remove_tag(&mut rr[r1_i], b"MC");
}
bam_fields::set_mate_ref_id(&mut rr[r2_i], r1_ref_id);
bam_fields::set_mate_pos(&mut rr[r2_i], r1_pos);
set_mate_flags_raw(&mut rr[r2_i], r1_is_reverse, false);
let r1_mq_val = if r1_mapq == 255 { 255 } else { i32::from(r1_mapq) };
bam_fields::update_int_tag(&mut rr[r2_i], b"MQ", r1_mq_val);
if !r1_cigar_str.is_empty() && r1_cigar_str != "*" {
bam_fields::update_string_tag(&mut rr[r2_i], b"MC", r1_cigar_str.as_bytes());
} else {
bam_fields::remove_tag(&mut rr[r2_i], b"MC");
}
bam_fields::set_template_length(&mut rr[r1_i], insert_size);
bam_fields::set_template_length(&mut rr[r2_i], -insert_size);
}
fn set_mate_info_both_unmapped_raw(&mut self, r1_i: usize, r2_i: usize) {
use crate::sort::bam_fields;
let rr = self.raw_records.as_ref().unwrap();
let r1_is_reverse = (bam_fields::flags(&rr[r1_i]) & bam_fields::flags::REVERSE) != 0;
let r2_is_reverse = (bam_fields::flags(&rr[r2_i]) & bam_fields::flags::REVERSE) != 0;
let rr = self.raw_records.as_mut().unwrap();
bam_fields::set_ref_id(&mut rr[r1_i], -1);
bam_fields::set_pos(&mut rr[r1_i], -1);
bam_fields::set_mate_ref_id(&mut rr[r1_i], -1);
bam_fields::set_mate_pos(&mut rr[r1_i], -1);
set_mate_flags_raw(&mut rr[r1_i], r2_is_reverse, true);
bam_fields::remove_tag(&mut rr[r1_i], b"MQ");
bam_fields::remove_tag(&mut rr[r1_i], b"MC");
bam_fields::set_template_length(&mut rr[r1_i], 0);
bam_fields::set_ref_id(&mut rr[r2_i], -1);
bam_fields::set_pos(&mut rr[r2_i], -1);
bam_fields::set_mate_ref_id(&mut rr[r2_i], -1);
bam_fields::set_mate_pos(&mut rr[r2_i], -1);
set_mate_flags_raw(&mut rr[r2_i], r1_is_reverse, true);
bam_fields::remove_tag(&mut rr[r2_i], b"MQ");
bam_fields::remove_tag(&mut rr[r2_i], b"MC");
bam_fields::set_template_length(&mut rr[r2_i], 0);
}
fn set_mate_info_one_unmapped_raw(&mut self, r1_i: usize, r2_i: usize, r1_is_unmapped: bool) {
use crate::sort::bam_fields;
let (mapped_i, unmapped_i) = if r1_is_unmapped { (r2_i, r1_i) } else { (r1_i, r2_i) };
let rr = self.raw_records.as_ref().unwrap();
let mapped_ref_id = bam_fields::ref_id(&rr[mapped_i]);
let mapped_pos = bam_fields::pos(&rr[mapped_i]);
let mapped_flags = bam_fields::flags(&rr[mapped_i]);
let mapped_is_reverse = (mapped_flags & bam_fields::flags::REVERSE) != 0;
let mapped_mapq = bam_fields::mapq(&rr[mapped_i]);
let mapped_cigar_str = bam_fields::cigar_to_string_from_raw(&rr[mapped_i]);
let unmapped_is_reverse =
(bam_fields::flags(&rr[unmapped_i]) & bam_fields::flags::REVERSE) != 0;
let rr = self.raw_records.as_mut().unwrap();
bam_fields::set_ref_id(&mut rr[unmapped_i], mapped_ref_id);
bam_fields::set_pos(&mut rr[unmapped_i], mapped_pos);
bam_fields::set_mate_ref_id(&mut rr[mapped_i], mapped_ref_id);
bam_fields::set_mate_pos(&mut rr[mapped_i], mapped_pos);
set_mate_flags_raw(&mut rr[mapped_i], unmapped_is_reverse, true);
bam_fields::remove_tag(&mut rr[mapped_i], b"MQ");
bam_fields::remove_tag(&mut rr[mapped_i], b"MC");
bam_fields::set_template_length(&mut rr[mapped_i], 0);
bam_fields::set_mate_ref_id(&mut rr[unmapped_i], mapped_ref_id);
bam_fields::set_mate_pos(&mut rr[unmapped_i], mapped_pos);
set_mate_flags_raw(&mut rr[unmapped_i], mapped_is_reverse, false);
let mq_val = if mapped_mapq == 255 { 255 } else { i32::from(mapped_mapq) };
bam_fields::update_int_tag(&mut rr[unmapped_i], b"MQ", mq_val);
if !mapped_cigar_str.is_empty() && mapped_cigar_str != "*" {
bam_fields::update_string_tag(&mut rr[unmapped_i], b"MC", mapped_cigar_str.as_bytes());
} else {
bam_fields::remove_tag(&mut rr[unmapped_i], b"MC");
}
bam_fields::set_template_length(&mut rr[unmapped_i], 0);
}
fn set_mate_info_both_mapped(
&mut self,
r1_i: usize,
r2_i: usize,
mapq_tag: Tag,
mate_cigar_tag: Tag,
) -> Result<()> {
let r2_ref_id = self.records[r2_i].reference_sequence_id();
let r2_pos = self.records[r2_i].alignment_start();
let r2_is_reverse = self.records[r2_i].flags().is_reverse_complemented();
let r2_mapq = self.records[r2_i].mapping_quality();
let r2_cigar_str = cigar_to_string(self.records[r2_i].cigar())?;
let r1_ref_id = self.records[r1_i].reference_sequence_id();
let r1_pos = self.records[r1_i].alignment_start();
let r1_is_reverse = self.records[r1_i].flags().is_reverse_complemented();
let r1_mapq = self.records[r1_i].mapping_quality();
let r1_cigar_str = cigar_to_string(self.records[r1_i].cigar())?;
*self.records[r1_i].mate_reference_sequence_id_mut() = r2_ref_id;
*self.records[r1_i].mate_alignment_start_mut() = r2_pos;
set_mate_flags(&mut self.records[r1_i], r2_is_reverse, false);
let r2_mapq_value = r2_mapq.map_or(255, |m| i32::from(u8::from(m)));
self.records[r1_i].data_mut().insert(mapq_tag, to_smallest_signed_int(r2_mapq_value));
if !r2_cigar_str.is_empty() && r2_cigar_str != "*" {
self.records[r1_i].data_mut().insert(mate_cigar_tag, BufValue::from(r2_cigar_str));
}
*self.records[r2_i].mate_reference_sequence_id_mut() = r1_ref_id;
*self.records[r2_i].mate_alignment_start_mut() = r1_pos;
set_mate_flags(&mut self.records[r2_i], r1_is_reverse, false);
let r1_mapq_value = r1_mapq.map_or(255, |m| i32::from(u8::from(m)));
self.records[r2_i].data_mut().insert(mapq_tag, to_smallest_signed_int(r1_mapq_value));
if !r1_cigar_str.is_empty() && r1_cigar_str != "*" {
self.records[r2_i].data_mut().insert(mate_cigar_tag, BufValue::from(r1_cigar_str));
}
let insert_size = compute_insert_size(&self.records[r1_i], &self.records[r2_i]);
*self.records[r1_i].template_length_mut() = insert_size;
*self.records[r2_i].template_length_mut() = -insert_size;
Ok(())
}
fn set_mate_info_both_unmapped(
&mut self,
r1_i: usize,
r2_i: usize,
mapq_tag: Tag,
mate_cigar_tag: Tag,
) {
let r1_is_reverse = self.records[r1_i].flags().is_reverse_complemented();
let r2_is_reverse = self.records[r2_i].flags().is_reverse_complemented();
*self.records[r1_i].reference_sequence_id_mut() = None;
*self.records[r1_i].alignment_start_mut() = None;
*self.records[r1_i].mate_reference_sequence_id_mut() = None;
*self.records[r1_i].mate_alignment_start_mut() = None;
set_mate_flags(&mut self.records[r1_i], r2_is_reverse, true);
self.records[r1_i].data_mut().remove(&mapq_tag);
self.records[r1_i].data_mut().remove(&mate_cigar_tag);
*self.records[r1_i].template_length_mut() = 0;
*self.records[r2_i].reference_sequence_id_mut() = None;
*self.records[r2_i].alignment_start_mut() = None;
*self.records[r2_i].mate_reference_sequence_id_mut() = None;
*self.records[r2_i].mate_alignment_start_mut() = None;
set_mate_flags(&mut self.records[r2_i], r1_is_reverse, true);
self.records[r2_i].data_mut().remove(&mapq_tag);
self.records[r2_i].data_mut().remove(&mate_cigar_tag);
*self.records[r2_i].template_length_mut() = 0;
}
fn set_mate_info_one_unmapped(
&mut self,
r1_i: usize,
r2_i: usize,
r1_is_unmapped: bool,
mapq_tag: Tag,
mate_cigar_tag: Tag,
) -> Result<()> {
let (mapped_i, unmapped_i) = if r1_is_unmapped { (r2_i, r1_i) } else { (r1_i, r2_i) };
let mapped_ref_id = self.records[mapped_i].reference_sequence_id();
let mapped_pos = self.records[mapped_i].alignment_start();
let mapped_is_reverse = self.records[mapped_i].flags().is_reverse_complemented();
let mapped_mapq = self.records[mapped_i].mapping_quality();
let mapped_cigar_str = cigar_to_string(self.records[mapped_i].cigar())?;
let unmapped_is_reverse = self.records[unmapped_i].flags().is_reverse_complemented();
*self.records[unmapped_i].reference_sequence_id_mut() = mapped_ref_id;
*self.records[unmapped_i].alignment_start_mut() = mapped_pos;
*self.records[mapped_i].mate_reference_sequence_id_mut() = mapped_ref_id;
*self.records[mapped_i].mate_alignment_start_mut() = mapped_pos;
set_mate_flags(&mut self.records[mapped_i], unmapped_is_reverse, true);
self.records[mapped_i].data_mut().remove(&mapq_tag);
self.records[mapped_i].data_mut().remove(&mate_cigar_tag);
*self.records[mapped_i].template_length_mut() = 0;
*self.records[unmapped_i].mate_reference_sequence_id_mut() = mapped_ref_id;
*self.records[unmapped_i].mate_alignment_start_mut() = mapped_pos;
set_mate_flags(&mut self.records[unmapped_i], mapped_is_reverse, false);
let mapped_mapq_value = mapped_mapq.map_or(255, |m| i32::from(u8::from(m)));
self.records[unmapped_i]
.data_mut()
.insert(mapq_tag, to_smallest_signed_int(mapped_mapq_value));
if !mapped_cigar_str.is_empty() && mapped_cigar_str != "*" {
self.records[unmapped_i]
.data_mut()
.insert(mate_cigar_tag, BufValue::from(mapped_cigar_str));
}
*self.records[unmapped_i].template_length_mut() = 0;
Ok(())
}
}
fn compute_insert_size(rec1: &RecordBuf, rec2: &RecordBuf) -> i32 {
if rec1.flags().is_unmapped() || rec2.flags().is_unmapped() {
return 0;
}
if rec1.reference_sequence_id() != rec2.reference_sequence_id() {
return 0;
}
let pos1 = rec1.alignment_start().map_or(0, |p| i32::try_from(usize::from(p)).unwrap_or(0));
let pos2 = rec2.alignment_start().map_or(0, |p| i32::try_from(usize::from(p)).unwrap_or(0));
let end1 = pos1 + alignment_length(rec1.cigar()) - 1;
let end2 = pos2 + alignment_length(rec2.cigar()) - 1;
let first_5prime = if rec1.flags().is_reverse_complemented() { end1 } else { pos1 };
let second_5prime = if rec2.flags().is_reverse_complemented() { end2 } else { pos2 };
let adjustment = if second_5prime >= first_5prime { 1 } else { -1 };
second_5prime - first_5prime + adjustment
}
fn alignment_length(cigar: &noodles::sam::alignment::record_buf::Cigar) -> i32 {
use noodles::sam::alignment::record::cigar::op::Kind;
let mut len = 0i32;
for op in cigar.iter().flatten() {
match op.kind() {
Kind::Match
| Kind::Deletion
| Kind::Skip
| Kind::SequenceMatch
| Kind::SequenceMismatch => {
len += i32::try_from(op.len()).unwrap_or(0);
}
_ => {}
}
}
len
}
fn set_mate_flags_raw(record: &mut [u8], mate_is_reverse: bool, mate_is_unmapped: bool) {
use crate::sort::bam_fields;
let mut f = bam_fields::flags(record);
f &= !bam_fields::flags::MATE_REVERSE;
if mate_is_reverse {
f |= bam_fields::flags::MATE_REVERSE;
}
f &= !bam_fields::flags::MATE_UNMAPPED;
if mate_is_unmapped {
f |= bam_fields::flags::MATE_UNMAPPED;
}
bam_fields::set_flags(record, f);
}
fn compute_insert_size_raw(rec1: &[u8], rec2: &[u8]) -> i32 {
use crate::sort::bam_fields;
let f1 = bam_fields::flags(rec1);
let f2 = bam_fields::flags(rec2);
if (f1 & bam_fields::flags::UNMAPPED) != 0 || (f2 & bam_fields::flags::UNMAPPED) != 0 {
return 0;
}
if bam_fields::ref_id(rec1) != bam_fields::ref_id(rec2) {
return 0;
}
let pos1 = bam_fields::pos(rec1) + 1;
let pos2 = bam_fields::pos(rec2) + 1;
let ref_len1 = bam_fields::reference_length_from_raw_bam(rec1);
let ref_len2 = bam_fields::reference_length_from_raw_bam(rec2);
let end1 = pos1 + ref_len1 - 1;
let end2 = pos2 + ref_len2 - 1;
let first_5prime = if (f1 & bam_fields::flags::REVERSE) != 0 { end1 } else { pos1 };
let second_5prime = if (f2 & bam_fields::flags::REVERSE) != 0 { end2 } else { pos2 };
let adjustment = if second_5prime >= first_5prime { 1 } else { -1 };
second_5prime - first_5prime + adjustment
}
#[allow(clippy::cast_possible_wrap)]
fn get_pair_orientation(record: &RecordBuf) -> PairOrientation {
let is_reverse = record.flags().is_reverse_complemented();
let mate_reverse = record.flags().is_mate_reverse_complemented();
if is_reverse == mate_reverse {
return PairOrientation::Tandem;
}
let alignment_start = record.alignment_start().map_or(0, usize::from);
let mate_start = record.mate_alignment_start().map_or(0, usize::from);
let insert_size = record.template_length();
let (positive_five_prime, negative_five_prime) = if is_reverse {
let end = record_utils::alignment_end(record).unwrap_or(alignment_start);
(mate_start as i64, end as i64)
} else {
(alignment_start as i64, alignment_start as i64 + i64::from(insert_size))
};
if positive_five_prime < negative_five_prime {
PairOrientation::FR
} else {
PairOrientation::RF
}
}
fn set_mate_flags(record: &mut RecordBuf, mate_is_reverse: bool, mate_is_unmapped: bool) {
use noodles::sam::alignment::record::Flags;
let mut flags = record.flags();
flags.remove(Flags::MATE_REVERSE_COMPLEMENTED);
if mate_is_reverse {
flags.insert(Flags::MATE_REVERSE_COMPLEMENTED);
}
flags.remove(Flags::MATE_UNMAPPED);
if mate_is_unmapped {
flags.insert(Flags::MATE_UNMAPPED);
}
*record.flags_mut() = flags;
}
fn extract_int_value(value: &BufValue) -> Option<i32> {
match value {
BufValue::Int8(i) => Some(i32::from(*i)),
BufValue::Int16(i) => Some(i32::from(*i)),
BufValue::Int32(i) => Some(*i),
BufValue::UInt8(i) => Some(i32::from(*i)),
BufValue::UInt16(i) => Some(i32::from(*i)),
BufValue::UInt32(i) => i32::try_from(*i).ok(),
_ => None,
}
}
fn kind_to_char(kind: noodles::sam::alignment::record::cigar::op::Kind) -> char {
use noodles::sam::alignment::record::cigar::op::Kind;
match kind {
Kind::Match => 'M',
Kind::Insertion => 'I',
Kind::Deletion => 'D',
Kind::Skip => 'N',
Kind::SoftClip => 'S',
Kind::HardClip => 'H',
Kind::Pad => 'P',
Kind::SequenceMatch => '=',
Kind::SequenceMismatch => 'X',
}
}
fn cigar_to_string(cigar: &noodles::sam::alignment::record_buf::Cigar) -> Result<String> {
if cigar.is_empty() {
return Ok(String::from("*"));
}
let mut result = String::with_capacity(cigar.as_ref().len() * 4);
for op in cigar.iter() {
let op = op?;
let _ = write!(result, "{}{}", op.len(), kind_to_char(op.kind()));
}
Ok(result)
}
pub struct TemplateIterator<I>
where
I: Iterator<Item = Result<RecordBuf>>,
{
record_iter: I,
builder: Builder,
}
impl<I> TemplateIterator<I>
where
I: Iterator<Item = Result<RecordBuf>>,
{
pub fn new(record_iter: I) -> Self {
TemplateIterator { record_iter, builder: Builder::new() }
}
}
impl<I> Iterator for TemplateIterator<I>
where
I: Iterator<Item = Result<RecordBuf>>,
{
type Item = Result<Template>;
fn next(&mut self) -> Option<Self::Item> {
loop {
match self.record_iter.next() {
None => {
if self.builder.is_empty() {
return None;
}
return Some(self.builder.build());
}
Some(Err(e)) => {
return Some(Err(e));
}
Some(Ok(record)) => {
let name: Vec<u8> = if let Some(n) = record.name() {
Vec::from(<_ as AsRef<[u8]>>::as_ref(n))
} else {
Vec::new()
};
if self.builder.name.iter().any(|n| *n == name) || self.builder.is_empty() {
if let Err(e) = self.builder.push(record) {
return Some(Err(e));
}
} else {
let template: std::result::Result<Template, anyhow::Error> =
self.builder.build();
if let Err(e) = self.builder.push(record) {
return Some(Err(e));
}
return Some(template);
}
}
}
}
}
}
impl MemoryEstimate for Template {
fn estimate_heap_size(&self) -> usize {
let name_size = self.name.capacity();
let records_size: usize = self.records.iter().map(estimate_record_buf_heap_size).sum();
let records_vec_overhead = self.records.capacity() * std::mem::size_of::<RecordBuf>();
let raw_records_size = self.raw_records.as_ref().map_or(0, |rr| {
rr.iter().map(Vec::capacity).sum::<usize>()
+ rr.capacity() * std::mem::size_of::<Vec<u8>>()
});
name_size + records_size + records_vec_overhead + raw_records_size
}
}
impl MemoryEstimate for TemplateBatch {
fn estimate_heap_size(&self) -> usize {
self.iter().map(MemoryEstimate::estimate_heap_size).sum::<usize>()
+ self.capacity() * std::mem::size_of::<Template>()
}
}
#[must_use]
pub fn estimate_record_buf_heap_size(record: &RecordBuf) -> usize {
let name_size = record.name().map_or(0, |n| n.len());
let seq_len = record.sequence().len();
let qual_len = record.quality_scores().len();
let cigar_ops = record.cigar().as_ref().len();
let cigar_size = cigar_ops * 4;
let data_fields = record.data().iter().count();
let entry_capacity = (data_fields * 115) / 100 + 1; let entries_size = data_fields * 48; let hash_table_size = entry_capacity * 16; let value_heap_size = data_fields * 16; let data_size = entries_size + hash_table_size + value_heap_size;
name_size + seq_len + qual_len + cigar_size + data_size
}
#[cfg(test)]
#[allow(clippy::similar_names)]
mod tests {
use super::*;
use crate::sam::builder::RecordBuilder;
use noodles::sam::alignment::record::Flags;
use noodles::sam::alignment::record::cigar::Op;
use noodles::sam::alignment::record::cigar::op::Kind;
use noodles::sam::alignment::record_buf::Cigar as CigarBuf;
fn create_test_record(name: &[u8], flags: u16) -> RecordBuf {
RecordBuilder::new()
.name(std::str::from_utf8(name).expect("read name should be valid UTF-8"))
.flags(Flags::from(flags))
.build()
}
const FLAG_PAIRED: u16 = 0x1;
const FLAG_READ1: u16 = 0x40;
const FLAG_READ2: u16 = 0x80;
const FLAG_SECONDARY: u16 = 0x100;
const FLAG_SUPPLEMENTARY: u16 = 0x800;
#[test]
fn test_template_new() {
let template = Template::new(b"read1".to_vec());
assert_eq!(template.name, b"read1");
assert_eq!(template.read_count(), 0);
}
#[test]
fn test_builder_empty() {
let builder = Builder::new();
assert!(builder.is_empty());
assert_eq!(builder.len(), 0);
}
#[test]
fn test_builder_push_r1_only() -> Result<()> {
let mut builder = Builder::new();
let r1 = create_test_record(b"read1", 0);
builder.push(r1)?;
assert_eq!(builder.len(), 1);
assert!(!builder.is_empty());
Ok(())
}
#[test]
fn test_builder_push_paired_end() -> Result<()> {
let mut builder = Builder::new();
let r1 = create_test_record(b"read1", FLAG_PAIRED | FLAG_READ1);
let r2 = create_test_record(b"read1", FLAG_PAIRED | FLAG_READ2);
builder.push(r1)?;
builder.push(r2)?;
assert_eq!(builder.len(), 2);
Ok(())
}
#[test]
fn test_builder_push_supplementary() -> Result<()> {
let mut builder = Builder::new();
let supp = create_test_record(b"read1", FLAG_SUPPLEMENTARY);
builder.push(supp)?;
assert_eq!(builder.len(), 1);
Ok(())
}
#[test]
fn test_builder_push_secondary() -> Result<()> {
let mut builder = Builder::new();
let sec = create_test_record(b"read1", FLAG_SECONDARY);
builder.push(sec)?;
assert_eq!(builder.len(), 1);
Ok(())
}
#[test]
fn test_builder_error_on_name_mismatch() {
let mut builder = Builder::new();
let r1 = create_test_record(b"read1", 0);
let r2 = create_test_record(b"read2", 0);
builder.push(r1).expect("failed to push record to builder");
let result = builder.push(r2);
assert!(result.is_err());
assert!(result.unwrap_err().to_string().contains("mismatch"));
}
#[test]
fn test_builder_error_on_multiple_r1() {
let mut builder = Builder::new();
let r1a = create_test_record(b"read1", 0);
let r1b = create_test_record(b"read1", 0);
builder.push(r1a).expect("failed to push record to builder");
let result = builder.push(r1b);
assert!(result.is_err());
assert!(result.unwrap_err().to_string().contains("Multiple non-secondary"));
}
#[test]
fn test_builder_error_on_multiple_r2() {
let mut builder = Builder::new();
let r2a = create_test_record(b"read1", FLAG_PAIRED | FLAG_READ2);
let r2b = create_test_record(b"read1", FLAG_PAIRED | FLAG_READ2);
builder.push(r2a).expect("failed to push record to builder");
let result = builder.push(r2b);
assert!(result.is_err());
assert!(result.unwrap_err().to_string().contains("Multiple non-secondary"));
}
#[test]
fn test_builder_build_empty_error() {
let mut builder = Builder::new();
let result = builder.build();
assert!(result.is_err());
assert!(result.unwrap_err().to_string().contains("No records"));
}
#[test]
fn test_builder_build_success() -> Result<()> {
let mut builder = Builder::new();
let r1 = create_test_record(b"read1", 0);
builder.push(r1)?;
let template = builder.build()?;
assert_eq!(template.name, b"read1");
assert_eq!(template.read_count(), 1);
assert!(template.r1().is_some());
Ok(())
}
#[test]
fn test_template_from_records() -> Result<()> {
let records = vec![
create_test_record(b"read1", FLAG_PAIRED | FLAG_READ1),
create_test_record(b"read1", FLAG_PAIRED | FLAG_READ2),
];
let template = Template::from_records(records)?;
assert_eq!(template.read_count(), 2);
assert!(template.r1().is_some());
assert!(template.r2().is_some());
Ok(())
}
#[test]
fn test_template_primary_reads() -> Result<()> {
let mut builder = Builder::new();
builder.push(create_test_record(b"read1", FLAG_PAIRED | FLAG_READ1))?;
builder.push(create_test_record(b"read1", FLAG_PAIRED | FLAG_READ2))?;
let template = builder.build()?;
let primaries: Vec<_> = template.primary_reads().collect();
assert_eq!(primaries.len(), 2);
Ok(())
}
#[test]
fn test_template_into_primary_reads() -> Result<()> {
let records = vec![
create_test_record(b"read1", FLAG_PAIRED | FLAG_READ1),
create_test_record(b"read1", FLAG_PAIRED | FLAG_READ2),
];
let template = Template::from_records(records)?;
assert!(template.r1().is_some());
assert!(template.r2().is_some());
let (r1, r2) = template.into_primary_reads();
assert!(r1.is_some());
assert!(r2.is_some());
let r1 = r1.expect("R1 record should be present");
let r2 = r2.expect("R2 record should be present");
assert!(r1.flags().is_first_segment());
assert!(r2.flags().is_last_segment());
Ok(())
}
#[test]
fn test_template_into_primary_reads_r1_only() -> Result<()> {
let records = vec![create_test_record(b"read1", FLAG_PAIRED | FLAG_READ1)];
let template = Template::from_records(records)?;
let (r1, r2) = template.into_primary_reads();
assert!(r1.is_some());
assert!(r2.is_none());
Ok(())
}
#[test]
fn test_template_all_reads() -> Result<()> {
let mut builder = Builder::new();
builder.push(create_test_record(b"read1", 0))?;
builder.push(create_test_record(b"read1", FLAG_SUPPLEMENTARY))?;
builder.push(create_test_record(b"read1", FLAG_SECONDARY))?;
let template = builder.build()?;
let all: Vec<_> = template.all_reads().collect();
assert_eq!(all.len(), 3);
Ok(())
}
#[test]
fn test_template_all_r1s() -> Result<()> {
let mut builder = Builder::new();
builder.push(create_test_record(b"read1", FLAG_PAIRED | FLAG_READ1))?;
builder
.push(create_test_record(b"read1", FLAG_PAIRED | FLAG_READ1 | FLAG_SUPPLEMENTARY))?;
let template = builder.build()?;
let all_r1s: Vec<_> = template.all_r1s().collect();
assert_eq!(all_r1s.len(), 2);
Ok(())
}
#[test]
fn test_template_all_r2s() -> Result<()> {
let mut builder = Builder::new();
builder.push(create_test_record(b"read1", FLAG_PAIRED | FLAG_READ2))?;
builder.push(create_test_record(b"read1", FLAG_PAIRED | FLAG_READ2 | FLAG_SECONDARY))?;
let template = builder.build()?;
let all_r2s: Vec<_> = template.all_r2s().collect();
assert_eq!(all_r2s.len(), 2);
Ok(())
}
#[test]
fn test_template_supplementals_and_secondaries() -> Result<()> {
let mut builder = Builder::new();
builder.push(create_test_record(b"read1", 0))?;
builder.push(create_test_record(b"read1", FLAG_SUPPLEMENTARY))?;
builder.push(create_test_record(b"read1", FLAG_SECONDARY))?;
let template = builder.build()?;
let non_primary: Vec<_> = template.all_supplementary_and_secondary().collect();
assert_eq!(non_primary.len(), 2);
Ok(())
}
#[test]
fn test_cigar_to_string() {
use noodles::sam::alignment::record_buf::Cigar;
let cigar = Cigar::default();
assert_eq!(cigar_to_string(&cigar).expect("cigar_to_string should succeed"), "*");
}
#[test]
fn test_cigar_to_string_with_ops() {
let ops =
vec![Op::new(Kind::Match, 10), Op::new(Kind::Deletion, 2), Op::new(Kind::Match, 5)];
let cigar = CigarBuf::from(ops);
let result = cigar_to_string(&cigar).expect("cigar_to_string should succeed");
assert_eq!(result, "10M2D5M");
}
#[test]
fn test_template_builder_reuse() -> Result<()> {
let mut builder = Builder::new();
builder.push(create_test_record(b"read1", 0))?;
let template1 = builder.build()?;
assert_eq!(template1.name, b"read1");
builder.push(create_test_record(b"read2", 0))?;
let template2 = builder.build()?;
assert_eq!(template2.name, b"read2");
Ok(())
}
#[test]
fn test_template_r1_supplementals() -> Result<()> {
let mut builder = Builder::new();
builder.push(create_test_record(b"read1", FLAG_PAIRED | FLAG_READ1))?;
builder
.push(create_test_record(b"read1", FLAG_PAIRED | FLAG_READ1 | FLAG_SUPPLEMENTARY))?;
builder
.push(create_test_record(b"read1", FLAG_PAIRED | FLAG_READ1 | FLAG_SUPPLEMENTARY))?;
let template = builder.build()?;
let supps = template.r1_supplementals();
assert_eq!(supps.len(), 2);
Ok(())
}
#[test]
fn test_template_r2_supplementals() -> Result<()> {
let mut builder = Builder::new();
builder.push(create_test_record(b"read1", FLAG_PAIRED | FLAG_READ2))?;
builder
.push(create_test_record(b"read1", FLAG_PAIRED | FLAG_READ2 | FLAG_SUPPLEMENTARY))?;
let template = builder.build()?;
let supps = template.r2_supplementals();
assert_eq!(supps.len(), 1);
Ok(())
}
#[test]
fn test_template_r1_secondaries() -> Result<()> {
let mut builder = Builder::new();
builder.push(create_test_record(b"read1", 0))?;
builder.push(create_test_record(b"read1", FLAG_SECONDARY))?;
let template = builder.build()?;
let secs = template.r1_secondaries();
assert_eq!(secs.len(), 1);
Ok(())
}
#[test]
fn test_template_r2_secondaries() -> Result<()> {
let mut builder = Builder::new();
builder.push(create_test_record(b"read1", FLAG_PAIRED | FLAG_READ2))?;
builder.push(create_test_record(b"read1", FLAG_PAIRED | FLAG_READ2 | FLAG_SECONDARY))?;
builder.push(create_test_record(b"read1", FLAG_PAIRED | FLAG_READ2 | FLAG_SECONDARY))?;
let template = builder.build()?;
let secs = template.r2_secondaries();
assert_eq!(secs.len(), 2);
Ok(())
}
#[test]
fn test_extract_int_value_int8() {
let value = BufValue::Int8(42);
assert_eq!(extract_int_value(&value), Some(42));
}
#[test]
fn test_extract_int_value_int16() {
let value = BufValue::Int16(1234);
assert_eq!(extract_int_value(&value), Some(1234));
}
#[test]
fn test_extract_int_value_int32() {
let value = BufValue::Int32(123_456);
assert_eq!(extract_int_value(&value), Some(123_456));
}
#[test]
fn test_extract_int_value_uint8() {
let value = BufValue::UInt8(255);
assert_eq!(extract_int_value(&value), Some(255));
}
#[test]
fn test_extract_int_value_uint16() {
let value = BufValue::UInt16(65535);
assert_eq!(extract_int_value(&value), Some(65535));
}
#[test]
fn test_extract_int_value_uint32() {
let value = BufValue::UInt32(100_000);
assert_eq!(extract_int_value(&value), Some(100_000));
}
#[test]
fn test_extract_int_value_uint32_overflow() {
let value = BufValue::UInt32(u32::MAX);
assert_eq!(extract_int_value(&value), None);
}
#[test]
fn test_extract_int_value_string_returns_none() {
let value = BufValue::String("test".into());
assert_eq!(extract_int_value(&value), None);
}
#[test]
fn test_extract_int_value_negative() {
let value = BufValue::Int8(-42);
assert_eq!(extract_int_value(&value), Some(-42));
let value = BufValue::Int16(-1234);
assert_eq!(extract_int_value(&value), Some(-1234));
let value = BufValue::Int32(-123_456);
assert_eq!(extract_int_value(&value), Some(-123_456));
}
fn create_mapped_record(name: &[u8], flags: u16, pos: usize, mapq: u8) -> RecordBuf {
create_mapped_record_with_tlen(name, flags, pos, mapq, 0)
}
fn create_mapped_record_with_tlen(
name: &[u8],
flags: u16,
pos: usize,
mapq: u8,
tlen: i32,
) -> RecordBuf {
let is_read1 = (flags & FLAG_READ1) != 0;
let is_paired = (flags & FLAG_PAIRED) != 0;
let mut builder = RecordBuilder::new()
.name(std::str::from_utf8(name).expect("read name should be valid UTF-8"))
.sequence(&"A".repeat(100))
.qualities(&[30u8; 100])
.cigar("100M")
.reference_sequence_id(0)
.alignment_start(pos)
.mapping_quality(mapq)
.template_length(tlen);
if is_paired {
builder = builder.first_segment(is_read1);
}
let mut record = builder.build();
*record.flags_mut() = Flags::from(flags);
record
}
#[test]
fn test_fix_mate_info_sets_ms_tag_with_int8_as() -> Result<()> {
let as_tag = Tag::from(SamTag::AS);
let ms_tag = Tag::from(SamTag::MS);
let mut r1 = create_mapped_record(b"read1", FLAG_PAIRED | FLAG_READ1, 100, 30);
r1.data_mut().insert(as_tag, BufValue::Int8(55));
let mut r2 = create_mapped_record(b"read1", FLAG_PAIRED | FLAG_READ2, 200, 40);
r2.data_mut().insert(as_tag, BufValue::Int8(44));
let mut builder = Builder::new();
builder.push(r1)?;
builder.push(r2)?;
let mut template = builder.build()?;
template.fix_mate_info()?;
let r1_ms = template.records[0].data().get(&ms_tag);
assert!(r1_ms.is_some(), "R1 should have ms tag");
assert_eq!(extract_int_value(r1_ms.expect("R1 ms tag should be present")), Some(44));
let r2_ms = template.records[1].data().get(&ms_tag);
assert!(r2_ms.is_some(), "R2 should have ms tag");
assert_eq!(extract_int_value(r2_ms.expect("R2 ms tag should be present")), Some(55));
Ok(())
}
#[test]
fn test_fix_mate_info_sets_ms_tag_with_uint8_as() -> Result<()> {
let as_tag = Tag::from(SamTag::AS);
let ms_tag = Tag::from(SamTag::MS);
let mut r1 = create_mapped_record(b"read1", FLAG_PAIRED | FLAG_READ1, 100, 30);
r1.data_mut().insert(as_tag, BufValue::UInt8(77));
let mut r2 = create_mapped_record(b"read1", FLAG_PAIRED | FLAG_READ2, 200, 40);
r2.data_mut().insert(as_tag, BufValue::UInt8(88));
let mut builder = Builder::new();
builder.push(r1)?;
builder.push(r2)?;
let mut template = builder.build()?;
template.fix_mate_info()?;
let r1_ms = template.records[0].data().get(&ms_tag);
assert!(r1_ms.is_some(), "R1 should have ms tag");
assert_eq!(extract_int_value(r1_ms.expect("R1 ms tag should be present")), Some(88));
let r2_ms = template.records[1].data().get(&ms_tag);
assert!(r2_ms.is_some(), "R2 should have ms tag");
assert_eq!(extract_int_value(r2_ms.expect("R2 ms tag should be present")), Some(77));
Ok(())
}
#[test]
fn test_fix_mate_info_sets_ms_tag_with_int16_as() -> Result<()> {
let as_tag = Tag::from(SamTag::AS);
let ms_tag = Tag::from(SamTag::MS);
let mut r1 = create_mapped_record(b"read1", FLAG_PAIRED | FLAG_READ1, 100, 30);
r1.data_mut().insert(as_tag, BufValue::Int16(1000));
let mut r2 = create_mapped_record(b"read1", FLAG_PAIRED | FLAG_READ2, 200, 40);
r2.data_mut().insert(as_tag, BufValue::Int16(2000));
let mut builder = Builder::new();
builder.push(r1)?;
builder.push(r2)?;
let mut template = builder.build()?;
template.fix_mate_info()?;
let r1_ms = template.records[0].data().get(&ms_tag);
assert!(r1_ms.is_some(), "R1 should have ms tag");
assert_eq!(extract_int_value(r1_ms.expect("R1 ms tag should be present")), Some(2000));
let r2_ms = template.records[1].data().get(&ms_tag);
assert!(r2_ms.is_some(), "R2 should have ms tag");
assert_eq!(extract_int_value(r2_ms.expect("R2 ms tag should be present")), Some(1000));
Ok(())
}
#[test]
fn test_fix_mate_info_sets_ms_tag_with_int32_as() -> Result<()> {
let as_tag = Tag::from(SamTag::AS);
let ms_tag = Tag::from(SamTag::MS);
let mut r1 = create_mapped_record(b"read1", FLAG_PAIRED | FLAG_READ1, 100, 30);
r1.data_mut().insert(as_tag, BufValue::Int32(100_000));
let mut r2 = create_mapped_record(b"read1", FLAG_PAIRED | FLAG_READ2, 200, 40);
r2.data_mut().insert(as_tag, BufValue::Int32(200_000));
let mut builder = Builder::new();
builder.push(r1)?;
builder.push(r2)?;
let mut template = builder.build()?;
template.fix_mate_info()?;
let r1_ms = template.records[0].data().get(&ms_tag);
assert!(r1_ms.is_some(), "R1 should have ms tag");
assert_eq!(extract_int_value(r1_ms.expect("R1 ms tag should be present")), Some(200_000));
let r2_ms = template.records[1].data().get(&ms_tag);
assert!(r2_ms.is_some(), "R2 should have ms tag");
assert_eq!(extract_int_value(r2_ms.expect("R2 ms tag should be present")), Some(100_000));
Ok(())
}
#[test]
fn test_fix_mate_info_no_ms_tag_without_as() -> Result<()> {
let ms_tag = Tag::from(SamTag::MS);
let r1 = create_mapped_record(b"read1", FLAG_PAIRED | FLAG_READ1, 100, 30);
let r2 = create_mapped_record(b"read1", FLAG_PAIRED | FLAG_READ2, 200, 40);
let mut builder = Builder::new();
builder.push(r1)?;
builder.push(r2)?;
let mut template = builder.build()?;
template.fix_mate_info()?;
assert!(template.records[0].data().get(&ms_tag).is_none(), "R1 should not have ms tag");
assert!(template.records[1].data().get(&ms_tag).is_none(), "R2 should not have ms tag");
Ok(())
}
#[test]
fn test_fix_mate_info_sets_ms_tag_on_supplementals() -> Result<()> {
let as_tag = Tag::from(SamTag::AS);
let ms_tag = Tag::from(SamTag::MS);
let mut r1 = create_mapped_record(b"read1", FLAG_PAIRED | FLAG_READ1, 100, 30);
r1.data_mut().insert(as_tag, BufValue::Int8(55));
let mut r2 = create_mapped_record(b"read1", FLAG_PAIRED | FLAG_READ2, 200, 40);
r2.data_mut().insert(as_tag, BufValue::Int8(44));
let mut r1_supp =
create_mapped_record(b"read1", FLAG_PAIRED | FLAG_READ1 | FLAG_SUPPLEMENTARY, 300, 20);
r1_supp.data_mut().insert(as_tag, BufValue::Int8(33));
let mut builder = Builder::new();
builder.push(r1)?;
builder.push(r2)?;
builder.push(r1_supp)?;
let mut template = builder.build()?;
template.fix_mate_info()?;
let r1_supp_ms = template.records[2].data().get(&ms_tag);
assert!(r1_supp_ms.is_some(), "R1 supplementary should have ms tag");
assert_eq!(
extract_int_value(r1_supp_ms.expect("R1 supplementary ms tag should be present")),
Some(44)
);
Ok(())
}
#[test]
fn test_fix_mate_info_sets_tlen_on_r1_supplementals() -> Result<()> {
let r1 = create_mapped_record_with_tlen(b"read1", FLAG_PAIRED | FLAG_READ1, 100, 30, 200);
let r2 = create_mapped_record_with_tlen(b"read1", FLAG_PAIRED | FLAG_READ2, 200, 40, -200);
let r1_supp = create_mapped_record_with_tlen(
b"read1",
FLAG_PAIRED | FLAG_READ1 | FLAG_SUPPLEMENTARY,
300,
20,
0,
);
let mut builder = Builder::new();
builder.push(r1)?;
builder.push(r2)?;
builder.push(r1_supp)?;
let mut template = builder.build()?;
template.fix_mate_info()?;
assert_eq!(
template.records[2].template_length(),
101,
"R1 supplementary TLEN should be negative of R2's TLEN"
);
Ok(())
}
#[test]
fn test_fix_mate_info_sets_tlen_on_r2_supplementals() -> Result<()> {
let r1 = create_mapped_record_with_tlen(b"read1", FLAG_PAIRED | FLAG_READ1, 100, 30, 300);
let r2 = create_mapped_record_with_tlen(b"read1", FLAG_PAIRED | FLAG_READ2, 200, 40, -300);
let r2_supp = create_mapped_record_with_tlen(
b"read1",
FLAG_PAIRED | FLAG_READ2 | FLAG_SUPPLEMENTARY,
400,
25,
0,
);
let mut builder = Builder::new();
builder.push(r1)?;
builder.push(r2)?;
builder.push(r2_supp)?;
let mut template = builder.build()?;
template.fix_mate_info()?;
assert_eq!(
template.records[2].template_length(),
-101,
"R2 supplementary TLEN should be negative of R1's TLEN"
);
Ok(())
}
#[test]
fn test_fix_mate_info_sets_tlen_on_multiple_supplementals() -> Result<()> {
let r1 = create_mapped_record_with_tlen(b"read1", FLAG_PAIRED | FLAG_READ1, 100, 30, 500);
let r2 = create_mapped_record_with_tlen(b"read1", FLAG_PAIRED | FLAG_READ2, 200, 40, -500);
let r1_supp1 = create_mapped_record_with_tlen(
b"read1",
FLAG_PAIRED | FLAG_READ1 | FLAG_SUPPLEMENTARY,
300,
20,
0,
);
let r1_supp2 = create_mapped_record_with_tlen(
b"read1",
FLAG_PAIRED | FLAG_READ1 | FLAG_SUPPLEMENTARY,
400,
15,
0,
);
let r2_supp1 = create_mapped_record_with_tlen(
b"read1",
FLAG_PAIRED | FLAG_READ2 | FLAG_SUPPLEMENTARY,
500,
25,
0,
);
let r2_supp2 = create_mapped_record_with_tlen(
b"read1",
FLAG_PAIRED | FLAG_READ2 | FLAG_SUPPLEMENTARY,
600,
10,
0,
);
let mut builder = Builder::new();
builder.push(r1)?;
builder.push(r2)?;
builder.push(r1_supp1)?;
builder.push(r1_supp2)?;
builder.push(r2_supp1)?;
builder.push(r2_supp2)?;
let mut template = builder.build()?;
template.fix_mate_info()?;
assert_eq!(template.records[2].template_length(), 101, "R1 supp1 TLEN");
assert_eq!(template.records[3].template_length(), 101, "R1 supp2 TLEN");
assert_eq!(template.records[4].template_length(), -101, "R2 supp1 TLEN");
assert_eq!(template.records[5].template_length(), -101, "R2 supp2 TLEN");
Ok(())
}
#[test]
fn test_fix_mate_info_tlen_recalculated() -> Result<()> {
let r1 = create_mapped_record_with_tlen(b"read1", FLAG_PAIRED | FLAG_READ1, 100, 30, 0);
let r2 = create_mapped_record_with_tlen(b"read1", FLAG_PAIRED | FLAG_READ2, 200, 40, 0);
let r1_supp = create_mapped_record_with_tlen(
b"read1",
FLAG_PAIRED | FLAG_READ1 | FLAG_SUPPLEMENTARY,
300,
20,
999, );
let mut builder = Builder::new();
builder.push(r1)?;
builder.push(r2)?;
builder.push(r1_supp)?;
let mut template = builder.build()?;
template.fix_mate_info()?;
assert_eq!(
template.records[2].template_length(),
101,
"R1 supplementary TLEN should be recalculated from positions"
);
Ok(())
}
#[test]
fn test_fix_mate_info_no_mate_primary() -> Result<()> {
let r1 = create_mapped_record_with_tlen(b"read1", FLAG_PAIRED | FLAG_READ1, 100, 30, 200);
let r1_supp = create_mapped_record_with_tlen(
b"read1",
FLAG_PAIRED | FLAG_READ1 | FLAG_SUPPLEMENTARY,
300,
20,
777, );
let mut builder = Builder::new();
builder.push(r1)?;
builder.push(r1_supp)?;
let mut template = builder.build()?;
template.fix_mate_info()?;
assert_eq!(
template.records[1].template_length(),
777,
"R1 supplementary TLEN should be unchanged without R2 primary"
);
Ok(())
}
#[test]
fn test_template_record_ordering() -> Result<()> {
let r1 = create_test_record(b"read1", FLAG_PAIRED | FLAG_READ1);
let r2 = create_test_record(b"read1", FLAG_PAIRED | FLAG_READ2);
let r1_supp1 = create_test_record(b"read1", FLAG_PAIRED | FLAG_READ1 | FLAG_SUPPLEMENTARY);
let r1_supp2 = create_test_record(b"read1", FLAG_PAIRED | FLAG_READ1 | FLAG_SUPPLEMENTARY);
let r2_supp = create_test_record(b"read1", FLAG_PAIRED | FLAG_READ2 | FLAG_SUPPLEMENTARY);
let r1_sec = create_test_record(b"read1", FLAG_PAIRED | FLAG_READ1 | FLAG_SECONDARY);
let r2_sec1 = create_test_record(b"read1", FLAG_PAIRED | FLAG_READ2 | FLAG_SECONDARY);
let r2_sec2 = create_test_record(b"read1", FLAG_PAIRED | FLAG_READ2 | FLAG_SECONDARY);
let mut builder = Builder::new();
builder.push(r2_sec1)?;
builder.push(r1_supp2)?;
builder.push(r1)?;
builder.push(r2_supp)?;
builder.push(r1_sec)?;
builder.push(r2)?;
builder.push(r2_sec2)?;
builder.push(r1_supp1)?;
let template = builder.build()?;
assert_eq!(template.r1, Some((0, 1)), "R1 should be at index 0");
assert_eq!(template.r2, Some((1, 2)), "R2 should be at index 1");
assert_eq!(template.r1_supplementals, Some((2, 4)), "R1 supps should be at indices 2-3");
assert_eq!(template.r2_supplementals, Some((4, 5)), "R2 supps should be at index 4");
assert_eq!(template.r1_secondaries, Some((5, 6)), "R1 secs should be at index 5");
assert_eq!(template.r2_secondaries, Some((6, 8)), "R2 secs should be at indices 6-7");
let all_reads: Vec<_> = template.all_reads().collect();
assert_eq!(all_reads.len(), 8);
assert!(
all_reads[0].flags().is_first_segment() && !all_reads[0].flags().is_supplementary()
);
assert!(all_reads[1].flags().is_last_segment() && !all_reads[1].flags().is_supplementary());
assert!(all_reads[2].flags().is_first_segment() && all_reads[2].flags().is_supplementary());
assert!(all_reads[3].flags().is_first_segment() && all_reads[3].flags().is_supplementary());
assert!(all_reads[4].flags().is_last_segment() && all_reads[4].flags().is_supplementary());
assert!(all_reads[5].flags().is_first_segment() && all_reads[5].flags().is_secondary());
assert!(all_reads[6].flags().is_last_segment() && all_reads[6].flags().is_secondary());
assert!(all_reads[7].flags().is_last_segment() && all_reads[7].flags().is_secondary());
Ok(())
}
#[test]
fn test_record_ordering_within_groups_is_reversed() -> Result<()> {
let r1 = create_mapped_record(b"read1", FLAG_PAIRED | FLAG_READ1, 100, 30);
let r2 = create_mapped_record(b"read1", FLAG_PAIRED | FLAG_READ2, 200, 30);
let r1_supp1 =
create_mapped_record(b"read1", FLAG_PAIRED | FLAG_READ1 | FLAG_SUPPLEMENTARY, 1000, 20);
let r1_supp2 =
create_mapped_record(b"read1", FLAG_PAIRED | FLAG_READ1 | FLAG_SUPPLEMENTARY, 2000, 20);
let r1_supp3 =
create_mapped_record(b"read1", FLAG_PAIRED | FLAG_READ1 | FLAG_SUPPLEMENTARY, 3000, 20);
let r2_supp1 =
create_mapped_record(b"read1", FLAG_PAIRED | FLAG_READ2 | FLAG_SUPPLEMENTARY, 4000, 20);
let r2_supp2 =
create_mapped_record(b"read1", FLAG_PAIRED | FLAG_READ2 | FLAG_SUPPLEMENTARY, 5000, 20);
let r1_sec1 =
create_mapped_record(b"read1", FLAG_PAIRED | FLAG_READ1 | FLAG_SECONDARY, 6000, 10);
let r1_sec2 =
create_mapped_record(b"read1", FLAG_PAIRED | FLAG_READ1 | FLAG_SECONDARY, 7000, 10);
let mut builder = Builder::new();
builder.push(r1)?;
builder.push(r2)?;
builder.push(r1_supp1)?;
builder.push(r1_supp2)?;
builder.push(r1_supp3)?;
builder.push(r2_supp1)?;
builder.push(r2_supp2)?;
builder.push(r1_sec1)?;
builder.push(r1_sec2)?;
let template = builder.build()?;
let get_pos = |rec: &RecordBuf| -> usize { rec.alignment_start().map_or(0, |p| p.get()) };
assert_eq!(get_pos(&template.records[0]), 100, "R1 primary");
assert_eq!(get_pos(&template.records[1]), 200, "R2 primary");
assert_eq!(get_pos(&template.records[2]), 3000, "R1 supp should be in reverse order");
assert_eq!(get_pos(&template.records[3]), 2000, "R1 supp should be in reverse order");
assert_eq!(get_pos(&template.records[4]), 1000, "R1 supp should be in reverse order");
assert_eq!(get_pos(&template.records[5]), 5000, "R2 supp should be in reverse order");
assert_eq!(get_pos(&template.records[6]), 4000, "R2 supp should be in reverse order");
assert_eq!(get_pos(&template.records[7]), 7000, "R1 sec should be in reverse order");
assert_eq!(get_pos(&template.records[8]), 6000, "R1 sec should be in reverse order");
Ok(())
}
fn create_mapped_record_with_flags(
name: &[u8],
flags: u16,
pos: usize,
mapq: u8,
tlen: i32,
mate_ref_id: Option<usize>,
mate_pos: Option<usize>,
) -> RecordBuf {
let is_read1 = (flags & FLAG_READ1) != 0;
let is_paired = (flags & FLAG_PAIRED) != 0;
let mut builder = RecordBuilder::new()
.name(std::str::from_utf8(name).expect("read name should be valid UTF-8"))
.sequence(&"A".repeat(100))
.qualities(&[30u8; 100])
.cigar("100M")
.reference_sequence_id(0)
.alignment_start(pos)
.mapping_quality(mapq)
.template_length(tlen);
if is_paired {
builder = builder.first_segment(is_read1);
}
if let Some(mate_ref) = mate_ref_id {
builder = builder.mate_reference_sequence_id(mate_ref);
}
if let Some(mate_p) = mate_pos {
builder = builder.mate_alignment_start(mate_p);
}
let mut record = builder.build();
*record.flags_mut() = Flags::from(flags);
record
}
const FLAG_REVERSE: u16 = 0x10;
const FLAG_MATE_REVERSE: u16 = 0x20;
const FLAG_UNMAPPED: u16 = 0x4;
const FLAG_MATE_UNMAPPED: u16 = 0x8;
#[test]
fn test_fix_mate_info_sets_full_mate_info_on_r1_supplementals() -> Result<()> {
let mq_tag = Tag::from(SamTag::MQ);
let mc_tag = Tag::from(SamTag::MC);
let as_tag = Tag::from(SamTag::AS);
let ms_tag = Tag::from(SamTag::MS);
let mut r1 = create_mapped_record_with_flags(
b"read1",
FLAG_PAIRED | FLAG_READ1,
100,
30,
200,
Some(0),
Some(200),
);
r1.data_mut().insert(as_tag, BufValue::Int32(100));
let mut r2 = create_mapped_record_with_flags(
b"read1",
FLAG_PAIRED | FLAG_READ2 | FLAG_REVERSE,
200,
40,
-200,
Some(0),
Some(100),
);
r2.data_mut().insert(as_tag, BufValue::Int32(150));
let r1_supp = create_mapped_record_with_flags(
b"read1",
FLAG_PAIRED | FLAG_READ1 | FLAG_SUPPLEMENTARY | FLAG_MATE_REVERSE, 500,
25,
0, Some(0),
Some(500), );
let mut builder = Builder::new();
builder.push(r1)?;
builder.push(r2)?;
builder.push(r1_supp)?;
let mut template = builder.build()?;
template.fix_mate_info()?;
let supp = &template.records[2];
assert_eq!(
supp.mate_alignment_start().map(|p| p.get()),
Some(200),
"R1 supp should have mate pos from R2"
);
assert_eq!(
supp.mate_reference_sequence_id(),
Some(0),
"R1 supp should have mate ref from R2"
);
assert!(
supp.flags().is_mate_reverse_complemented(),
"R1 supp should have mate_reverse set since R2 is reverse"
);
assert!(
!supp.flags().is_mate_unmapped(),
"R1 supp should NOT have mate_unmapped since R2 is mapped"
);
assert_eq!(supp.template_length(), 200, "R1 supp TLEN should be -(-200) = 200");
let mq_value = supp.data().get(&mq_tag).and_then(extract_int_value);
assert_eq!(mq_value, Some(40), "R1 supp MQ should be R2's mapq");
let mc_value = supp.data().get(&mc_tag);
assert!(mc_value.is_some(), "R1 supp should have MC tag");
let ms_value = supp.data().get(&ms_tag).and_then(extract_int_value);
assert_eq!(ms_value, Some(150), "R1 supp ms should be R2's AS value");
Ok(())
}
#[test]
fn test_fix_mate_info_sets_full_mate_info_on_r2_supplementals() -> Result<()> {
let mq_tag = Tag::from(SamTag::MQ);
let mc_tag = Tag::from(SamTag::MC);
let as_tag = Tag::from(SamTag::AS);
let ms_tag = Tag::from(SamTag::MS);
let mut r1 = create_mapped_record_with_flags(
b"read1",
FLAG_PAIRED | FLAG_READ1,
100,
35,
300,
Some(0),
Some(200),
);
r1.data_mut().insert(as_tag, BufValue::Int32(120));
let mut r2 = create_mapped_record_with_flags(
b"read1",
FLAG_PAIRED | FLAG_READ2,
200,
45,
-300,
Some(0),
Some(100),
);
r2.data_mut().insert(as_tag, BufValue::Int32(180));
let r2_supp = create_mapped_record_with_flags(
b"read1",
FLAG_PAIRED | FLAG_READ2 | FLAG_SUPPLEMENTARY,
600,
20,
0, Some(0),
Some(600), );
let mut builder = Builder::new();
builder.push(r1)?;
builder.push(r2)?;
builder.push(r2_supp)?;
let mut template = builder.build()?;
template.fix_mate_info()?;
let supp = &template.records[2];
assert_eq!(
supp.mate_alignment_start().map(|p| p.get()),
Some(100),
"R2 supp should have mate pos from R1"
);
assert!(
!supp.flags().is_mate_reverse_complemented(),
"R2 supp should NOT have mate_reverse since R1 is forward"
);
assert_eq!(supp.template_length(), -101, "R2 supp TLEN should be -(101) = -101");
let mq_value = supp.data().get(&mq_tag).and_then(extract_int_value);
assert_eq!(mq_value, Some(35), "R2 supp MQ should be R1's mapq");
assert!(supp.data().get(&mc_tag).is_some(), "R2 supp should have MC tag");
let ms_value = supp.data().get(&ms_tag).and_then(extract_int_value);
assert_eq!(ms_value, Some(120), "R2 supp ms should be R1's AS value");
Ok(())
}
#[test]
fn test_fix_mate_info_supplemental_with_unmapped_mate() -> Result<()> {
let mc_tag = Tag::from(SamTag::MC);
let r1 = create_mapped_record_with_flags(
b"read1",
FLAG_PAIRED | FLAG_READ1 | FLAG_MATE_UNMAPPED, 100,
30,
0, Some(0),
Some(100), );
let mut r2 = create_mapped_record_with_flags(
b"read1",
FLAG_PAIRED | FLAG_READ2 | FLAG_UNMAPPED,
100, 0, 0, Some(0),
Some(100),
);
*r2.cigar_mut() = CigarBuf::default();
let r1_supp = create_mapped_record_with_flags(
b"read1",
FLAG_PAIRED | FLAG_READ1 | FLAG_SUPPLEMENTARY | FLAG_MATE_REVERSE, 500,
25,
999, Some(0),
Some(500), );
let mut builder = Builder::new();
builder.push(r1)?;
builder.push(r2)?;
builder.push(r1_supp)?;
let mut template = builder.build()?;
template.fix_mate_info()?;
let supp = &template.records[2];
assert!(
supp.flags().is_mate_unmapped(),
"R1 supp should have mate_unmapped since R2 is unmapped"
);
assert!(
!supp.flags().is_mate_reverse_complemented(),
"R1 supp should NOT have mate_reverse when R2 is unmapped (unmapped reads have no orientation)"
);
assert!(
supp.data().get(&mc_tag).is_none(),
"R1 supp should NOT have MC tag when mate is unmapped"
);
assert_eq!(supp.template_length(), 0, "TLEN should be 0 when mate is unmapped");
Ok(())
}
#[test]
fn test_fix_mate_info_clears_incorrect_mate_reverse_flag() -> Result<()> {
let r1 = create_mapped_record_with_flags(
b"read1",
FLAG_PAIRED | FLAG_READ1,
100,
30,
200,
Some(0),
Some(200),
);
let r2 = create_mapped_record_with_flags(
b"read1",
FLAG_PAIRED | FLAG_READ2, 200,
40,
-200,
Some(0),
Some(100),
);
let r1_supp = create_mapped_record_with_flags(
b"read1",
FLAG_PAIRED | FLAG_READ1 | FLAG_SUPPLEMENTARY | FLAG_MATE_REVERSE, 500,
25,
0,
Some(0),
Some(500),
);
let mut builder = Builder::new();
builder.push(r1)?;
builder.push(r2)?;
builder.push(r1_supp)?;
let mut template = builder.build()?;
assert!(
template.records[2].flags().is_mate_reverse_complemented(),
"Before fix: supp incorrectly has mate_reverse"
);
template.fix_mate_info()?;
assert!(
!template.records[2].flags().is_mate_reverse_complemented(),
"After fix: supp should NOT have mate_reverse since R2 is forward"
);
Ok(())
}
#[test]
fn test_fix_mate_info_multiple_supplementals() -> Result<()> {
let as_tag = Tag::from(SamTag::AS);
let r1 = create_mapped_record_with_flags(
b"read1",
FLAG_PAIRED | FLAG_READ1,
100,
30,
400,
Some(0),
Some(300),
);
let mut r2 = create_mapped_record_with_flags(
b"read1",
FLAG_PAIRED | FLAG_READ2 | FLAG_REVERSE,
300,
50,
-400,
Some(0),
Some(100),
);
r2.data_mut().insert(as_tag, BufValue::Int32(200));
let r1_supp1 = create_mapped_record_with_flags(
b"read1",
FLAG_PAIRED | FLAG_READ1 | FLAG_SUPPLEMENTARY,
500,
20,
0,
Some(0),
Some(500),
);
let r1_supp2 = create_mapped_record_with_flags(
b"read1",
FLAG_PAIRED | FLAG_READ1 | FLAG_SUPPLEMENTARY,
700,
15,
0,
Some(0),
Some(700),
);
let mut builder = Builder::new();
builder.push(r1)?;
builder.push(r2)?;
builder.push(r1_supp1)?;
builder.push(r1_supp2)?;
let mut template = builder.build()?;
template.fix_mate_info()?;
for i in 2..=3 {
let supp = &template.records[i];
assert_eq!(
supp.mate_alignment_start().map(|p| p.get()),
Some(300),
"Supp {i} should have mate pos 300"
);
assert!(
supp.flags().is_mate_reverse_complemented(),
"Supp {i} should have mate_reverse"
);
assert_eq!(supp.template_length(), 300, "Supp {i} TLEN should be 300");
}
Ok(())
}
fn create_insert_size_record(
name: &[u8],
flags: u16,
pos: usize,
read_length: usize,
ref_id: Option<usize>,
) -> RecordBuf {
let is_read1 = (flags & FLAG_READ1) != 0;
let is_paired = (flags & FLAG_PAIRED) != 0;
let mut builder = RecordBuilder::new()
.name(std::str::from_utf8(name).expect("read name should be valid UTF-8"))
.sequence(&"A".repeat(read_length))
.qualities(&vec![30u8; read_length])
.cigar(&format!("{read_length}M"))
.alignment_start(pos)
.mapping_quality(30);
if let Some(rid) = ref_id {
builder = builder.reference_sequence_id(rid);
}
if is_paired {
builder = builder.first_segment(is_read1);
}
let mut record = builder.build();
*record.flags_mut() = Flags::from(flags);
record
}
#[test]
fn test_compute_insert_size_normal_innie() {
let r1 = create_insert_size_record(b"read1", FLAG_PAIRED | FLAG_READ1, 1, 100, Some(0));
let r2 = create_insert_size_record(
b"read1",
FLAG_PAIRED | FLAG_READ2 | FLAG_REVERSE,
500,
100,
Some(0),
);
let insert_size = compute_insert_size(&r1, &r2);
assert_eq!(insert_size, 599);
}
#[test]
fn test_compute_insert_size_overlapping_innie() {
let r1 = create_insert_size_record(b"read1", FLAG_PAIRED | FLAG_READ1, 1, 100, Some(0));
let r2 = create_insert_size_record(
b"read1",
FLAG_PAIRED | FLAG_READ2 | FLAG_REVERSE,
50,
100,
Some(0),
);
let insert_size = compute_insert_size(&r1, &r2);
assert_eq!(insert_size, 149);
}
#[test]
fn test_compute_insert_size_completely_overlapping_innie() {
let r1 = create_insert_size_record(b"read1", FLAG_PAIRED | FLAG_READ1, 1, 100, Some(0));
let r2 = create_insert_size_record(
b"read1",
FLAG_PAIRED | FLAG_READ2 | FLAG_REVERSE,
1,
100,
Some(0),
);
let insert_size = compute_insert_size(&r1, &r2);
assert_eq!(insert_size, 100);
}
#[test]
fn test_compute_insert_size_normal_outie() {
let r1 = create_insert_size_record(
b"read1",
FLAG_PAIRED | FLAG_READ1 | FLAG_REVERSE,
1,
100,
Some(0),
);
let r2 = create_insert_size_record(b"read1", FLAG_PAIRED | FLAG_READ2, 500, 100, Some(0));
let insert_size = compute_insert_size(&r1, &r2);
assert_eq!(insert_size, 401);
}
#[test]
fn test_compute_insert_size_forward_tandem() {
let r1 = create_insert_size_record(
b"read1",
FLAG_PAIRED | FLAG_READ1 | FLAG_REVERSE,
1,
100,
Some(0),
);
let r2 = create_insert_size_record(
b"read1",
FLAG_PAIRED | FLAG_READ2 | FLAG_REVERSE,
500,
100,
Some(0),
);
let insert_size = compute_insert_size(&r1, &r2);
assert_eq!(insert_size, 500);
}
#[test]
fn test_compute_insert_size_reverse_tandem() {
let r1 = create_insert_size_record(b"read1", FLAG_PAIRED | FLAG_READ1, 1, 100, Some(0));
let r2 = create_insert_size_record(b"read1", FLAG_PAIRED | FLAG_READ2, 500, 100, Some(0));
let insert_size = compute_insert_size(&r1, &r2);
assert_eq!(insert_size, 500);
}
#[test]
fn test_compute_insert_size_negative() {
let r1 = create_insert_size_record(b"read1", FLAG_PAIRED | FLAG_READ1, 500, 100, Some(0));
let r2 = create_insert_size_record(
b"read1",
FLAG_PAIRED | FLAG_READ2 | FLAG_REVERSE,
1,
100,
Some(0),
);
let insert_size = compute_insert_size(&r1, &r2);
assert_eq!(insert_size, -401);
}
#[test]
fn test_compute_insert_size_different_references() {
let r1 = create_insert_size_record(b"read1", FLAG_PAIRED | FLAG_READ1, 100, 100, Some(0));
let r2 = create_insert_size_record(
b"read1",
FLAG_PAIRED | FLAG_READ2 | FLAG_REVERSE,
200,
100,
Some(1), );
let insert_size = compute_insert_size(&r1, &r2);
assert_eq!(insert_size, 0, "Insert size should be 0 for different references");
}
#[test]
fn test_compute_insert_size_r1_unmapped() {
let r1 = create_insert_size_record(
b"read1",
FLAG_PAIRED | FLAG_READ1 | FLAG_UNMAPPED,
100,
100,
Some(0),
);
let r2 = create_insert_size_record(
b"read1",
FLAG_PAIRED | FLAG_READ2 | FLAG_REVERSE,
200,
100,
Some(0),
);
let insert_size = compute_insert_size(&r1, &r2);
assert_eq!(insert_size, 0, "Insert size should be 0 when R1 is unmapped");
}
#[test]
fn test_compute_insert_size_r2_unmapped() {
let r1 = create_insert_size_record(b"read1", FLAG_PAIRED | FLAG_READ1, 100, 100, Some(0));
let r2 = create_insert_size_record(
b"read1",
FLAG_PAIRED | FLAG_READ2 | FLAG_UNMAPPED,
200,
100,
Some(0),
);
let insert_size = compute_insert_size(&r1, &r2);
assert_eq!(insert_size, 0, "Insert size should be 0 when R2 is unmapped");
}
#[test]
fn test_compute_insert_size_both_unmapped() {
let r1 = create_insert_size_record(
b"read1",
FLAG_PAIRED | FLAG_READ1 | FLAG_UNMAPPED,
100,
100,
Some(0),
);
let r2 = create_insert_size_record(
b"read1",
FLAG_PAIRED | FLAG_READ2 | FLAG_UNMAPPED,
200,
100,
Some(0),
);
let insert_size = compute_insert_size(&r1, &r2);
assert_eq!(insert_size, 0, "Insert size should be 0 when both reads are unmapped");
}
#[test]
fn test_compute_insert_size_with_indels() {
let r1 = RecordBuilder::new()
.name("read1")
.sequence(&"A".repeat(100))
.qualities(&[30u8; 100])
.reference_sequence_id(0)
.alignment_start(1)
.cigar("50M10I40M")
.first_segment(true)
.build();
let mut r2 = RecordBuilder::new()
.name("read1")
.sequence(&"A".repeat(100))
.qualities(&[30u8; 100])
.reference_sequence_id(0)
.alignment_start(200)
.cigar("50M5D50M")
.first_segment(false)
.reverse_complement(true)
.build();
*r2.flags_mut() = Flags::from(FLAG_PAIRED | FLAG_READ2 | FLAG_REVERSE);
let insert_size = compute_insert_size(&r1, &r2);
assert_eq!(insert_size, 304);
}
#[test]
fn test_fix_mate_info_sets_tlen_normal_innie() -> Result<()> {
let r1 = create_insert_size_record(b"read1", FLAG_PAIRED | FLAG_READ1, 1, 100, Some(0));
let r2 = create_insert_size_record(
b"read1",
FLAG_PAIRED | FLAG_READ2 | FLAG_REVERSE,
500,
100,
Some(0),
);
let mut builder = Builder::new();
builder.push(r1)?;
builder.push(r2)?;
let mut template = builder.build()?;
template.fix_mate_info()?;
assert_eq!(template.records[0].template_length(), 599, "R1 TLEN");
assert_eq!(template.records[1].template_length(), -599, "R2 TLEN");
Ok(())
}
#[test]
fn test_fix_mate_info_sets_mate_cigar() -> Result<()> {
let mc_tag = Tag::from(SamTag::MC);
let r1 = RecordBuilder::new()
.name("read1")
.sequence(&"A".repeat(50))
.qualities(&[30u8; 50])
.reference_sequence_id(0)
.alignment_start(1)
.cigar("50M")
.first_segment(true)
.build();
let mut r2 = RecordBuilder::new()
.name("read1")
.sequence(&"A".repeat(50))
.qualities(&[30u8; 50])
.reference_sequence_id(0)
.alignment_start(500)
.cigar("25M5I20M")
.first_segment(false)
.reverse_complement(true)
.build();
*r2.flags_mut() = Flags::from(FLAG_PAIRED | FLAG_READ2 | FLAG_REVERSE);
let mut builder = Builder::new();
builder.push(r1)?;
builder.push(r2)?;
let mut template = builder.build()?;
template.fix_mate_info()?;
let r1_mc = template.records[0].data().get(&mc_tag);
assert!(r1_mc.is_some(), "R1 should have MC tag");
let Some(BufValue::String(mc)) = r1_mc else {
unreachable!("MC tag should be a string");
};
assert_eq!(&mc[..], b"25M5I20M", "R1 MC should be R2's CIGAR");
let r2_mc = template.records[1].data().get(&mc_tag);
assert!(r2_mc.is_some(), "R2 should have MC tag");
let Some(BufValue::String(mc)) = r2_mc else {
unreachable!("MC tag should be a string");
};
assert_eq!(&mc[..], b"50M", "R2 MC should be R1's CIGAR");
Ok(())
}
#[test]
fn test_fix_mate_info_both_unmapped() -> Result<()> {
let mq_tag = Tag::from(SamTag::MQ);
let mc_tag = Tag::from(SamTag::MC);
let mut r1 = create_test_record(b"read1", FLAG_PAIRED | FLAG_READ1 | FLAG_UNMAPPED);
*r1.reference_sequence_id_mut() = Some(0); *r1.data_mut() = [(mq_tag, BufValue::Int32(30))].into_iter().collect();
let mut r2 = create_test_record(b"read1", FLAG_PAIRED | FLAG_READ2 | FLAG_UNMAPPED);
*r2.reference_sequence_id_mut() = Some(0);
*r2.data_mut() = [(mc_tag, BufValue::String("100M".into()))].into_iter().collect();
let mut builder = Builder::new();
builder.push(r1)?;
builder.push(r2)?;
let mut template = builder.build()?;
template.fix_mate_info()?;
assert!(template.records[0].reference_sequence_id().is_none(), "R1 ref should be cleared");
assert!(template.records[1].reference_sequence_id().is_none(), "R2 ref should be cleared");
assert!(template.records[0].flags().is_mate_unmapped(), "R1 mate_unmapped");
assert!(template.records[1].flags().is_mate_unmapped(), "R2 mate_unmapped");
assert!(template.records[0].data().get(&mq_tag).is_none(), "R1 MQ should be removed");
assert!(template.records[1].data().get(&mc_tag).is_none(), "R2 MC should be removed");
assert_eq!(template.records[0].template_length(), 0, "R1 TLEN should be 0");
assert_eq!(template.records[1].template_length(), 0, "R2 TLEN should be 0");
Ok(())
}
#[test]
fn test_fix_mate_info_one_unmapped() -> Result<()> {
let mq_tag = Tag::from(SamTag::MQ);
let mc_tag = Tag::from(SamTag::MC);
let r1 = create_mapped_record(b"read1", FLAG_PAIRED | FLAG_READ1, 100, 30);
let r2 = create_test_record(b"read1", FLAG_PAIRED | FLAG_READ2 | FLAG_UNMAPPED);
let mut builder = Builder::new();
builder.push(r1)?;
builder.push(r2)?;
let mut template = builder.build()?;
template.fix_mate_info()?;
assert_eq!(
template.records[1].reference_sequence_id(),
Some(0),
"R2 should be placed at R1's reference"
);
assert_eq!(
template.records[1].alignment_start().map(|p| p.get()),
Some(100),
"R2 should be placed at R1's position"
);
assert!(
template.records[0].data().get(&mq_tag).is_none(),
"R1 should NOT have MQ when mate is unmapped"
);
assert!(
template.records[1].data().get(&mq_tag).is_some(),
"R2 should have MQ when mate is mapped"
);
assert!(template.records[0].flags().is_mate_unmapped(), "R1 mate_unmapped should be set");
assert!(
!template.records[1].flags().is_mate_unmapped(),
"R2 mate_unmapped should NOT be set"
);
assert_eq!(template.records[0].template_length(), 0, "R1 TLEN");
assert_eq!(template.records[1].template_length(), 0, "R2 TLEN");
assert!(
template.records[0].data().get(&mc_tag).is_none(),
"R1 should NOT have MC when mate is unmapped"
);
assert!(
template.records[1].data().get(&mc_tag).is_some(),
"R2 should have MC when mate is mapped"
);
Ok(())
}
#[test]
fn test_pair_orientation_fr_pair() -> Result<()> {
let r1 = create_mapped_record_with_flags(
b"read1",
FLAG_PAIRED | FLAG_READ1 | FLAG_MATE_REVERSE,
100,
30,
200, Some(0),
Some(200),
);
let r2 = create_mapped_record_with_flags(
b"read1",
FLAG_PAIRED | FLAG_READ2 | FLAG_REVERSE,
200,
30,
-200,
Some(0),
Some(100),
);
let template = Template::from_records(vec![r1, r2])?;
assert_eq!(template.pair_orientation(), Some(PairOrientation::FR));
Ok(())
}
#[test]
fn test_pair_orientation_rf_pair() -> Result<()> {
let r1 = create_mapped_record_with_flags(
b"read1",
FLAG_PAIRED | FLAG_READ1 | FLAG_MATE_REVERSE,
300,
30,
-200, Some(0),
Some(100),
);
let r2 = create_mapped_record_with_flags(
b"read1",
FLAG_PAIRED | FLAG_READ2 | FLAG_REVERSE,
100,
30,
200,
Some(0),
Some(300),
);
let template = Template::from_records(vec![r1, r2])?;
assert_eq!(template.pair_orientation(), Some(PairOrientation::RF));
Ok(())
}
#[test]
fn test_pair_orientation_tandem_both_forward() -> Result<()> {
let r1 = create_mapped_record_with_flags(
b"read1",
FLAG_PAIRED | FLAG_READ1, 100,
30,
100,
Some(0),
Some(200),
);
let r2 = create_mapped_record_with_flags(
b"read1",
FLAG_PAIRED | FLAG_READ2, 200,
30,
-100,
Some(0),
Some(100),
);
let template = Template::from_records(vec![r1, r2])?;
assert_eq!(template.pair_orientation(), Some(PairOrientation::Tandem));
Ok(())
}
#[test]
fn test_pair_orientation_tandem_both_reverse() -> Result<()> {
let r1 = create_mapped_record_with_flags(
b"read1",
FLAG_PAIRED | FLAG_READ1 | FLAG_REVERSE | FLAG_MATE_REVERSE,
100,
30,
100,
Some(0),
Some(200),
);
let r2 = create_mapped_record_with_flags(
b"read1",
FLAG_PAIRED | FLAG_READ2 | FLAG_REVERSE | FLAG_MATE_REVERSE,
200,
30,
-100,
Some(0),
Some(100),
);
let template = Template::from_records(vec![r1, r2])?;
assert_eq!(template.pair_orientation(), Some(PairOrientation::Tandem));
Ok(())
}
#[test]
fn test_pair_orientation_no_r1() -> Result<()> {
let r2 = create_mapped_record_with_flags(
b"read1",
FLAG_PAIRED | FLAG_READ2 | FLAG_REVERSE,
100,
30,
0,
Some(0),
Some(100),
);
let template = Template::from_records(vec![r2])?;
assert_eq!(template.pair_orientation(), None);
Ok(())
}
#[test]
fn test_pair_orientation_no_r2() -> Result<()> {
let r1 = create_mapped_record_with_flags(
b"read1",
FLAG_PAIRED | FLAG_READ1,
100,
30,
0,
Some(0),
Some(100),
);
let template = Template::from_records(vec![r1])?;
assert_eq!(template.pair_orientation(), None);
Ok(())
}
#[test]
fn test_pair_orientation_r1_unmapped() -> Result<()> {
let r1 = create_mapped_record_with_flags(
b"read1",
FLAG_PAIRED | FLAG_READ1 | FLAG_UNMAPPED,
100,
30,
0,
Some(0),
Some(100),
);
let r2 = create_mapped_record_with_flags(
b"read1",
FLAG_PAIRED | FLAG_READ2,
100,
30,
0,
Some(0),
Some(100),
);
let template = Template::from_records(vec![r1, r2])?;
assert_eq!(template.pair_orientation(), None);
Ok(())
}
#[test]
fn test_pair_orientation_r2_unmapped() -> Result<()> {
let r1 = create_mapped_record_with_flags(
b"read1",
FLAG_PAIRED | FLAG_READ1,
100,
30,
0,
Some(0),
Some(100),
);
let r2 = create_mapped_record_with_flags(
b"read1",
FLAG_PAIRED | FLAG_READ2 | FLAG_UNMAPPED,
100,
30,
0,
Some(0),
Some(100),
);
let template = Template::from_records(vec![r1, r2])?;
assert_eq!(template.pair_orientation(), None);
Ok(())
}
#[test]
fn test_pair_orientation_different_chromosomes() -> Result<()> {
let mut r1 = create_mapped_record_with_flags(
b"read1",
FLAG_PAIRED | FLAG_READ1 | FLAG_MATE_REVERSE,
100,
30,
200,
Some(1), Some(200),
);
*r1.reference_sequence_id_mut() = Some(0);
let mut r2 = create_mapped_record_with_flags(
b"read1",
FLAG_PAIRED | FLAG_READ2 | FLAG_REVERSE,
200,
30,
-200,
Some(0),
Some(100),
);
*r2.reference_sequence_id_mut() = Some(1);
let template = Template::from_records(vec![r1, r2])?;
assert_eq!(template.pair_orientation(), None);
Ok(())
}
#[test]
fn test_pair_orientation_fr_from_reverse_read() -> Result<()> {
let r1 = create_mapped_record_with_flags(
b"read1",
FLAG_PAIRED | FLAG_READ1 | FLAG_REVERSE, 200,
30,
-200, Some(0),
Some(100),
);
let r2 = create_mapped_record_with_flags(
b"read1",
FLAG_PAIRED | FLAG_READ2 | FLAG_MATE_REVERSE, 100,
30,
200,
Some(0),
Some(200),
);
let template = Template::from_records(vec![r1, r2])?;
assert_eq!(template.pair_orientation(), Some(PairOrientation::FR));
Ok(())
}
#[allow(clippy::cast_possible_truncation)]
fn make_minimal_raw_bam(name: &[u8], flags: u16) -> Vec<u8> {
let l_read_name = (name.len() + 1) as u8; let total = 32 + l_read_name as usize; let mut buf = vec![0u8; total];
buf[8] = l_read_name;
buf[14..16].copy_from_slice(&flags.to_le_bytes());
let name_start = 32;
buf[name_start..name_start + name.len()].copy_from_slice(name);
buf[name_start + name.len()] = 0;
buf
}
#[test]
fn test_from_raw_records_creates_raw_mode_template() {
let raw = make_minimal_raw_bam(b"read1", FLAG_PAIRED | FLAG_READ1);
let template =
Template::from_raw_records(vec![raw]).expect("from_raw_records should succeed");
assert!(template.is_raw_byte_mode());
assert!(template.records.is_empty());
assert!(template.raw_r1().is_some());
}
#[test]
fn test_r1_accessor_returns_none_in_raw_mode() {
let raw = make_minimal_raw_bam(b"read1", FLAG_PAIRED | FLAG_READ1);
let template =
Template::from_raw_records(vec![raw]).expect("from_raw_records should succeed");
assert!(template.r1().is_none());
assert!(template.raw_r1().is_some());
}
#[test]
fn test_r2_accessor_returns_none_in_raw_mode() {
let r1 = make_minimal_raw_bam(b"read1", FLAG_PAIRED | FLAG_READ1);
let r2 = make_minimal_raw_bam(b"read1", FLAG_PAIRED | FLAG_READ2);
let template =
Template::from_raw_records(vec![r1, r2]).expect("from_raw_records should succeed");
assert!(template.is_raw_byte_mode());
assert!(template.r2().is_none());
assert!(template.raw_r2().is_some());
}
#[test]
fn test_from_raw_records_fast_path_normal_order() {
let r1 = make_minimal_raw_bam(b"read1", FLAG_PAIRED | FLAG_READ1);
let r2 = make_minimal_raw_bam(b"read1", FLAG_PAIRED | FLAG_READ2);
let template =
Template::from_raw_records(vec![r1, r2]).expect("from_raw_records should succeed");
assert!(template.is_raw_byte_mode());
assert!(template.raw_r1().is_some());
assert!(template.raw_r2().is_some());
let raw_r1 = template.raw_r1().expect("raw_r1 should be present");
let r1_flags = crate::sort::bam_fields::flags(raw_r1);
assert_ne!(r1_flags & FLAG_READ1, 0);
}
#[test]
fn test_from_raw_records_fast_path_swap() {
let r1 = make_minimal_raw_bam(b"read1", FLAG_PAIRED | FLAG_READ1);
let r2 = make_minimal_raw_bam(b"read1", FLAG_PAIRED | FLAG_READ2);
let template = Template::from_raw_records(vec![r2, r1]).expect("from_raw_records");
assert!(template.is_raw_byte_mode());
let raw_r1 = template.raw_r1().expect("raw_r1 should be present");
let r1_flags = crate::sort::bam_fields::flags(raw_r1);
assert_ne!(r1_flags & FLAG_READ1, 0);
let raw_r2 = template.raw_r2().expect("raw_r2 should be present");
let r2_flags = crate::sort::bam_fields::flags(raw_r2);
assert_ne!(r2_flags & FLAG_READ2, 0);
}
#[test]
fn test_from_raw_records_fast_path_fallthrough_both_r1() {
let r1a = make_minimal_raw_bam(b"read1", FLAG_PAIRED | FLAG_READ1);
let r1b = make_minimal_raw_bam(b"read1", FLAG_PAIRED | FLAG_READ1);
let result = Template::from_raw_records(vec![r1a, r1b]);
assert!(result.is_err());
}
#[test]
fn test_from_raw_records_fast_path_with_secondary() {
let r1 = make_minimal_raw_bam(b"read1", FLAG_PAIRED | FLAG_READ1);
let sec = make_minimal_raw_bam(
b"read1",
FLAG_PAIRED | FLAG_READ1 | crate::sort::bam_fields::flags::SECONDARY,
);
let template =
Template::from_raw_records(vec![r1, sec]).expect("from_raw_records should succeed");
assert!(template.is_raw_byte_mode());
assert!(template.raw_r1().is_some());
}
#[test]
fn test_from_raw_records_with_supplementary() {
let r1 = make_minimal_raw_bam(b"read1", FLAG_PAIRED | FLAG_READ1);
let r1_supp = make_minimal_raw_bam(
b"read1",
FLAG_PAIRED | FLAG_READ1 | crate::sort::bam_fields::flags::SUPPLEMENTARY,
);
let r2 = make_minimal_raw_bam(b"read1", FLAG_PAIRED | FLAG_READ2);
let template =
Template::from_raw_records(vec![r1, r1_supp, r2]).expect("value should be present");
assert!(template.is_raw_byte_mode());
assert!(template.raw_r1().is_some());
assert!(template.raw_r2().is_some());
}
#[test]
fn test_from_raw_records_single_unpaired() {
let r = make_minimal_raw_bam(b"read1", 0); let template =
Template::from_raw_records(vec![r]).expect("from_raw_records should succeed");
assert!(template.is_raw_byte_mode());
assert!(template.raw_r1().is_some());
assert!(template.raw_r2().is_none());
}
#[test]
fn test_from_raw_records_empty() {
let result = Template::from_raw_records(vec![]);
assert!(result.is_err());
}
#[test]
fn test_from_raw_records_name_mismatch() {
let r1 = make_minimal_raw_bam(b"read1", FLAG_PAIRED | FLAG_READ1);
let r1_supp = make_minimal_raw_bam(
b"read1",
FLAG_PAIRED | FLAG_READ1 | crate::sort::bam_fields::flags::SUPPLEMENTARY,
);
let r2_wrong = make_minimal_raw_bam(b"read2", FLAG_PAIRED | FLAG_READ2);
let result = Template::from_raw_records(vec![r1, r1_supp, r2_wrong]);
assert!(result.is_err());
}
#[test]
fn test_from_raw_records_all_raw_records_mut() {
let r1 = make_minimal_raw_bam(b"read1", FLAG_PAIRED | FLAG_READ1);
let r2 = make_minimal_raw_bam(b"read1", FLAG_PAIRED | FLAG_READ2);
let mut template =
Template::from_raw_records(vec![r1, r2]).expect("from_raw_records should succeed");
let recs = template.all_raw_records_mut().expect("all_raw_records_mut should succeed");
assert_eq!(recs.len(), 2);
}
#[test]
fn test_from_raw_records_into_raw_records() {
let r1 = make_minimal_raw_bam(b"read1", FLAG_PAIRED | FLAG_READ1);
let r2 = make_minimal_raw_bam(b"read1", FLAG_PAIRED | FLAG_READ2);
let template =
Template::from_raw_records(vec![r1, r2]).expect("from_raw_records should succeed");
let recs = template.into_raw_records().expect("into_raw_records should succeed");
assert_eq!(recs.len(), 2);
}
#[test]
fn test_from_raw_records_truncated_header() {
let short = vec![0u8; 20];
let err = Template::from_raw_records(vec![short]).unwrap_err();
assert!(err.to_string().contains("too short"), "Error: {err}");
}
#[test]
fn test_from_raw_records_truncated_read_name() {
let mut buf = vec![0u8; 34]; buf[8] = 10; let err = Template::from_raw_records(vec![buf]).unwrap_err();
assert!(err.to_string().contains("truncated"), "Error: {err}");
}
#[test]
fn test_from_raw_records_valid_l_read_name() {
let rec = make_minimal_raw_bam(b"test", FLAG_PAIRED | FLAG_READ1);
assert!(Template::from_raw_records(vec![rec]).is_ok());
}
#[test]
fn test_write_with_offset_none() {
let mi = MoleculeId::None;
let mut buf = String::new();
let result = mi.write_with_offset(100, &mut buf);
assert!(result.is_empty());
}
#[test]
fn test_write_with_offset_single() {
let mi = MoleculeId::Single(5);
let mut buf = String::new();
let result = mi.write_with_offset(100, &mut buf);
assert_eq!(result, b"105");
}
#[test]
fn test_write_with_offset_paired_a() {
let mi = MoleculeId::PairedA(3);
let mut buf = String::new();
let result = mi.write_with_offset(10, &mut buf);
assert_eq!(result, b"13/A");
}
#[test]
fn test_write_with_offset_paired_b() {
let mi = MoleculeId::PairedB(3);
let mut buf = String::new();
let result = mi.write_with_offset(10, &mut buf);
assert_eq!(result, b"13/B");
}
#[test]
fn test_write_with_offset_reuses_buffer() {
let mut buf = String::new();
let mi1 = MoleculeId::Single(1);
let _ = mi1.write_with_offset(0, &mut buf);
assert_eq!(buf, "1");
let mi2 = MoleculeId::PairedA(99);
let result = mi2.write_with_offset(0, &mut buf);
assert_eq!(result, b"99/A");
assert_eq!(buf, "99/A");
}
#[test]
fn test_write_with_offset_matches_to_string() {
let mut buf = String::new();
for mi in [
MoleculeId::None,
MoleculeId::Single(0),
MoleculeId::Single(42),
MoleculeId::PairedA(7),
MoleculeId::PairedB(7),
] {
let expected = mi.to_string_with_offset(100);
let result = mi.write_with_offset(100, &mut buf);
assert_eq!(result, expected.as_bytes(), "Mismatch for {mi:?}");
}
}
#[test]
fn test_records_in_range_invalid_start_gt_end() {
let template = Template {
name: b"read1".to_vec(),
records: vec![
create_test_record(b"read1", FLAG_PAIRED | FLAG_READ1),
create_test_record(b"read1", FLAG_PAIRED | FLAG_READ2),
],
raw_records: None,
r1: Some((0, 1)),
r2: Some((1, 2)),
r1_supplementals: None,
r2_supplementals: None,
r1_secondaries: None,
r2_secondaries: None,
mi: MoleculeId::None,
};
assert!(template.records_in_range(Some((2, 1))).is_empty());
assert_eq!(template.records_in_range(Some((0, 2))).len(), 2);
assert!(template.records_in_range(None).is_empty());
assert!(template.records_in_range(Some((0, 10))).is_empty());
}
}