use crate::unified_pipeline::MemoryEstimate;
use anyhow::{Result, bail};
use fgumi_raw_bam::{RawRecord, RawRecordView};
pub use crate::sam::PairOrientation;
pub use fgumi_umi::MoleculeId;
#[derive(Debug, Clone)]
pub struct Template {
pub name: Vec<u8>,
pub records: Vec<RawRecord>,
pub r1: Option<(usize, usize)>,
pub r2: Option<(usize, usize)>,
pub r1_supplementals: Option<(usize, usize)>,
pub r2_supplementals: Option<(usize, usize)>,
pub r1_secondaries: Option<(usize, usize)>,
pub r2_secondaries: Option<(usize, usize)>,
pub mi: MoleculeId,
}
pub type TemplateBatch = Vec<Template>;
impl Template {
#[must_use]
pub fn new(name: Vec<u8>) -> Self {
Template {
name,
records: Vec::new(),
r1: None,
r2: None,
r1_supplementals: None,
r2_supplementals: None,
r1_secondaries: None,
r2_secondaries: None,
mi: MoleculeId::None,
}
}
#[must_use]
pub fn r1(&self) -> Option<&RawRecord> {
self.r1.map(|_| &self.records[0])
}
#[must_use]
pub fn r2(&self) -> Option<&RawRecord> {
self.r2.map(|(i, _)| &self.records[i])
}
#[must_use]
pub fn records(&self) -> &[RawRecord] {
&self.records
}
pub fn records_mut(&mut self) -> &mut [RawRecord] {
&mut self.records
}
#[must_use]
pub fn into_records(self) -> Vec<RawRecord> {
self.records
}
pub fn primary_reads(&self) -> impl Iterator<Item = &RawRecord> {
let records = &self.records;
[self.r1.map(|(s, _)| s), self.r2.map(|(s, _)| s)]
.into_iter()
.flatten()
.filter_map(move |start| records.get(start))
}
#[must_use]
pub fn read_count(&self) -> usize {
self.records.len()
}
#[allow(clippy::too_many_lines)]
pub fn from_records(mut raw_records: Vec<RawRecord>) -> Result<Self> {
use crate::sort::bam_fields;
if raw_records.is_empty() {
bail!("No records given to Template::from_records");
}
for (i, r) in raw_records.iter().enumerate() {
if r.len() < bam_fields::MIN_BAM_RECORD_LEN {
bail!(
"Raw BAM record {i} too short to parse ({} < {})",
r.len(),
bam_fields::MIN_BAM_RECORD_LEN
);
}
let l_rn = r[8] as usize;
if r.len() < 32 + l_rn {
bail!(
"Raw BAM record {i} truncated: l_read_name={l_rn} but only {} bytes after header",
r.len() - 32
);
}
}
let name = bam_fields::read_name(&raw_records[0]).to_vec();
for rec in raw_records.iter().skip(1) {
if bam_fields::read_name(rec) != name.as_slice() {
bail!("Template name mismatch in from_raw_records");
}
}
if raw_records.len() == 2 {
let f0 = RawRecordView::new(&raw_records[0]).flags();
let f1 = RawRecordView::new(&raw_records[1]).flags();
let neither_sec_supp =
(f0 | f1) & (bam_fields::flags::SECONDARY | bam_fields::flags::SUPPLEMENTARY) == 0;
if neither_sec_supp {
let is_r1_0 = (f0 & bam_fields::flags::PAIRED) == 0
|| (f0 & bam_fields::flags::FIRST_SEGMENT) != 0;
let is_r1_1 = (f1 & bam_fields::flags::PAIRED) == 0
|| (f1 & bam_fields::flags::FIRST_SEGMENT) != 0;
if is_r1_0 != is_r1_1 {
if !is_r1_0 {
raw_records.swap(0, 1);
}
return Ok(Template {
name,
records: raw_records,
r1: Some((0, 1)),
r2: Some((1, 2)),
r1_supplementals: None,
r2_supplementals: None,
r1_secondaries: None,
r2_secondaries: None,
mi: MoleculeId::None,
});
}
}
}
for rec in raw_records.iter().skip(1) {
if bam_fields::read_name(rec) != name.as_slice() {
bail!("Template name mismatch in from_records");
}
}
let mut r1_idx: Option<usize> = None;
let mut r2_idx: Option<usize> = None;
let mut r1_supp: Vec<usize> = Vec::new();
let mut r2_supp: Vec<usize> = Vec::new();
let mut r1_sec: Vec<usize> = Vec::new();
let mut r2_sec: Vec<usize> = Vec::new();
for (i, rec) in raw_records.iter().enumerate() {
let flg = RawRecordView::new(rec).flags();
let is_secondary = (flg & bam_fields::flags::SECONDARY) != 0;
let is_supplementary = (flg & bam_fields::flags::SUPPLEMENTARY) != 0;
let is_paired = (flg & bam_fields::flags::PAIRED) != 0;
let is_first = (flg & bam_fields::flags::FIRST_SEGMENT) != 0;
let is_r1 = !is_paired || is_first;
if is_r1 {
if is_secondary {
r1_sec.push(i);
} else if is_supplementary {
r1_supp.push(i);
} else if r1_idx.is_some() {
bail!(
"Multiple non-secondary, non-supplemental R1 records for read '{name:?}'"
);
} else {
r1_idx = Some(i);
}
} else if is_secondary {
r2_sec.push(i);
} else if is_supplementary {
r2_supp.push(i);
} else if r2_idx.is_some() {
bail!("Multiple non-secondary, non-supplemental R2 records for read '{name:?}'");
} else {
r2_idx = Some(i);
}
}
let mut ordered: Vec<RawRecord> = Vec::with_capacity(raw_records.len());
let mut take = |idx: usize| -> RawRecord { std::mem::take(&mut raw_records[idx]) };
let r1_pair = if let Some(idx) = r1_idx {
ordered.push(take(idx));
Some((ordered.len() - 1, ordered.len()))
} else {
None
};
let r2_pair = if let Some(idx) = r2_idx {
ordered.push(take(idx));
Some((ordered.len() - 1, ordered.len()))
} else {
None
};
let r1_supp_pair = if r1_supp.is_empty() {
None
} else {
r1_supp.reverse();
let start = ordered.len();
for idx in &r1_supp {
ordered.push(take(*idx));
}
Some((start, ordered.len()))
};
let r2_supp_pair = if r2_supp.is_empty() {
None
} else {
r2_supp.reverse();
let start = ordered.len();
for idx in &r2_supp {
ordered.push(take(*idx));
}
Some((start, ordered.len()))
};
let r1_sec_pair = if r1_sec.is_empty() {
None
} else {
r1_sec.reverse();
let start = ordered.len();
for idx in &r1_sec {
ordered.push(take(*idx));
}
Some((start, ordered.len()))
};
let r2_sec_pair = if r2_sec.is_empty() {
None
} else {
r2_sec.reverse();
let start = ordered.len();
for idx in &r2_sec {
ordered.push(take(*idx));
}
Some((start, ordered.len()))
};
Ok(Template {
name,
records: ordered,
r1: r1_pair,
r2: r2_pair,
r1_supplementals: r1_supp_pair,
r2_supplementals: r2_supp_pair,
r1_secondaries: r1_sec_pair,
r2_secondaries: r2_sec_pair,
mi: MoleculeId::None,
})
}
#[must_use]
pub fn pair_orientation(&self) -> Option<PairOrientation> {
use crate::sort::bam_fields;
let r1 = self.r1()?;
let r2 = self.r2()?;
let f1 = RawRecordView::new(r1).flags();
let f2 = RawRecordView::new(r2).flags();
if (f1 & bam_fields::flags::UNMAPPED) != 0 || (f2 & bam_fields::flags::UNMAPPED) != 0 {
return None;
}
if bam_fields::ref_id(r1) != bam_fields::ref_id(r2) {
return None;
}
Some(get_pair_orientation_raw(r1))
}
#[allow(clippy::too_many_lines)]
pub fn fix_mate_info(&mut self) -> Result<()> {
use crate::sort::bam_fields;
let rr = &mut self.records;
if rr.is_empty() {
return Ok(());
}
if let (Some((r1_i, _)), Some((r2_i, _))) = (self.r1, self.r2) {
let r1_is_unmapped =
(RawRecordView::new(&rr[r1_i]).flags() & bam_fields::flags::UNMAPPED) != 0;
let r2_is_unmapped =
(RawRecordView::new(&rr[r2_i]).flags() & bam_fields::flags::UNMAPPED) != 0;
let r1_as = bam_fields::find_int_tag(bam_fields::aux_data_slice(&rr[r1_i]), b"AS");
let r2_as = bam_fields::find_int_tag(bam_fields::aux_data_slice(&rr[r2_i]), b"AS");
if !r1_is_unmapped && !r2_is_unmapped {
self.set_mate_info_both_mapped(r1_i, r2_i);
} else if r1_is_unmapped && r2_is_unmapped {
self.set_mate_info_both_unmapped(r1_i, r2_i);
} else {
self.set_mate_info_one_unmapped(r1_i, r2_i, r1_is_unmapped);
}
let rr = &mut self.records;
if let Some(as_value) = r2_as {
if let Ok(v) = i32::try_from(as_value) {
bam_fields::remove_tag(rr[r1_i].as_mut_vec(), b"ms");
bam_fields::append_signed_int_tag(rr[r1_i].as_mut_vec(), b"ms", v);
}
}
if let Some(as_value) = r1_as {
if let Ok(v) = i32::try_from(as_value) {
bam_fields::remove_tag(rr[r2_i].as_mut_vec(), b"ms");
bam_fields::append_signed_int_tag(rr[r2_i].as_mut_vec(), b"ms", v);
}
}
}
if let Some((r2_i, _)) = self.r2 {
let rr = &self.records;
let r2_ref_id = bam_fields::ref_id(&rr[r2_i]);
let r2_pos = bam_fields::pos(&rr[r2_i]);
let r2_flags = RawRecordView::new(&rr[r2_i]).flags();
let r2_is_reverse = (r2_flags & bam_fields::flags::REVERSE) != 0;
let r2_is_unmapped = (r2_flags & bam_fields::flags::UNMAPPED) != 0;
let r2_tlen = bam_fields::template_length(&rr[r2_i]);
let r2_mapq = bam_fields::mapq(&rr[r2_i]);
let r2_cigar_str = bam_fields::cigar_to_string_from_raw(&rr[r2_i]);
let r2_as = bam_fields::find_int_tag(bam_fields::aux_data_slice(&rr[r2_i]), b"AS");
if let Some((start, end)) = self.r1_supplementals {
let rr = &mut self.records;
for rec in &mut rr[start..end] {
bam_fields::set_mate_ref_id(rec, r2_ref_id);
bam_fields::set_mate_pos(rec, r2_pos);
set_mate_flags(rec, r2_is_reverse, r2_is_unmapped);
bam_fields::set_template_length(rec, -r2_tlen);
let mq_val = if r2_mapq == 255 { 255 } else { i32::from(r2_mapq) };
bam_fields::update_int_tag(rec.as_mut_vec(), b"MQ", mq_val);
if !r2_cigar_str.is_empty() && r2_cigar_str != "*" && !r2_is_unmapped {
bam_fields::update_string_tag(
rec.as_mut_vec(),
b"MC",
r2_cigar_str.as_bytes(),
);
} else {
bam_fields::remove_tag(rec.as_mut_vec(), b"MC");
}
if let Some(as_value) = r2_as {
if let Ok(v) = i32::try_from(as_value) {
bam_fields::remove_tag(rec.as_mut_vec(), b"ms");
bam_fields::append_signed_int_tag(rec.as_mut_vec(), b"ms", v);
}
}
}
}
}
if let Some((r1_i, _)) = self.r1 {
let rr = &self.records;
let r1_ref_id = bam_fields::ref_id(&rr[r1_i]);
let r1_pos = bam_fields::pos(&rr[r1_i]);
let r1_flags = RawRecordView::new(&rr[r1_i]).flags();
let r1_is_reverse = (r1_flags & bam_fields::flags::REVERSE) != 0;
let r1_is_unmapped = (r1_flags & bam_fields::flags::UNMAPPED) != 0;
let r1_tlen = bam_fields::template_length(&rr[r1_i]);
let r1_mapq = bam_fields::mapq(&rr[r1_i]);
let r1_cigar_str = bam_fields::cigar_to_string_from_raw(&rr[r1_i]);
let r1_as = bam_fields::find_int_tag(bam_fields::aux_data_slice(&rr[r1_i]), b"AS");
if let Some((start, end)) = self.r2_supplementals {
let rr = &mut self.records;
for rec in &mut rr[start..end] {
bam_fields::set_mate_ref_id(rec, r1_ref_id);
bam_fields::set_mate_pos(rec, r1_pos);
set_mate_flags(rec, r1_is_reverse, r1_is_unmapped);
bam_fields::set_template_length(rec, -r1_tlen);
let mq_val = if r1_mapq == 255 { 255 } else { i32::from(r1_mapq) };
bam_fields::update_int_tag(rec.as_mut_vec(), b"MQ", mq_val);
if !r1_cigar_str.is_empty() && r1_cigar_str != "*" && !r1_is_unmapped {
bam_fields::update_string_tag(
rec.as_mut_vec(),
b"MC",
r1_cigar_str.as_bytes(),
);
} else {
bam_fields::remove_tag(rec.as_mut_vec(), b"MC");
}
if let Some(as_value) = r1_as {
if let Ok(v) = i32::try_from(as_value) {
bam_fields::remove_tag(rec.as_mut_vec(), b"ms");
bam_fields::append_signed_int_tag(rec.as_mut_vec(), b"ms", v);
}
}
}
}
}
Ok(())
}
fn set_mate_info_both_mapped(&mut self, r1_i: usize, r2_i: usize) {
use crate::sort::bam_fields;
let rr = &self.records;
let r2_ref_id = bam_fields::ref_id(&rr[r2_i]);
let r2_pos = bam_fields::pos(&rr[r2_i]);
let r2_is_reverse =
(RawRecordView::new(&rr[r2_i]).flags() & bam_fields::flags::REVERSE) != 0;
let r2_mapq = bam_fields::mapq(&rr[r2_i]);
let r2_cigar_str = bam_fields::cigar_to_string_from_raw(&rr[r2_i]);
let r1_ref_id = bam_fields::ref_id(&rr[r1_i]);
let r1_pos = bam_fields::pos(&rr[r1_i]);
let r1_is_reverse =
(RawRecordView::new(&rr[r1_i]).flags() & bam_fields::flags::REVERSE) != 0;
let r1_mapq = bam_fields::mapq(&rr[r1_i]);
let r1_cigar_str = bam_fields::cigar_to_string_from_raw(&rr[r1_i]);
let insert_size = compute_insert_size_raw(&rr[r1_i], &rr[r2_i]);
let rr = &mut self.records;
bam_fields::set_mate_ref_id(&mut rr[r1_i], r2_ref_id);
bam_fields::set_mate_pos(&mut rr[r1_i], r2_pos);
set_mate_flags(&mut rr[r1_i], r2_is_reverse, false);
let r2_mq_val = if r2_mapq == 255 { 255 } else { i32::from(r2_mapq) };
bam_fields::update_int_tag(rr[r1_i].as_mut_vec(), b"MQ", r2_mq_val);
if !r2_cigar_str.is_empty() && r2_cigar_str != "*" {
bam_fields::update_string_tag(rr[r1_i].as_mut_vec(), b"MC", r2_cigar_str.as_bytes());
} else {
bam_fields::remove_tag(rr[r1_i].as_mut_vec(), b"MC");
}
bam_fields::set_mate_ref_id(&mut rr[r2_i], r1_ref_id);
bam_fields::set_mate_pos(&mut rr[r2_i], r1_pos);
set_mate_flags(&mut rr[r2_i], r1_is_reverse, false);
let r1_mq_val = if r1_mapq == 255 { 255 } else { i32::from(r1_mapq) };
bam_fields::update_int_tag(rr[r2_i].as_mut_vec(), b"MQ", r1_mq_val);
if !r1_cigar_str.is_empty() && r1_cigar_str != "*" {
bam_fields::update_string_tag(rr[r2_i].as_mut_vec(), b"MC", r1_cigar_str.as_bytes());
} else {
bam_fields::remove_tag(rr[r2_i].as_mut_vec(), b"MC");
}
bam_fields::set_template_length(&mut rr[r1_i], insert_size);
bam_fields::set_template_length(&mut rr[r2_i], -insert_size);
}
fn set_mate_info_both_unmapped(&mut self, r1_i: usize, r2_i: usize) {
use crate::sort::bam_fields;
let rr = &self.records;
let r1_is_reverse =
(RawRecordView::new(&rr[r1_i]).flags() & bam_fields::flags::REVERSE) != 0;
let r2_is_reverse =
(RawRecordView::new(&rr[r2_i]).flags() & bam_fields::flags::REVERSE) != 0;
let rr = &mut self.records;
bam_fields::set_ref_id(&mut rr[r1_i], -1);
bam_fields::set_pos(&mut rr[r1_i], -1);
bam_fields::set_mate_ref_id(&mut rr[r1_i], -1);
bam_fields::set_mate_pos(&mut rr[r1_i], -1);
set_mate_flags(&mut rr[r1_i], r2_is_reverse, true);
bam_fields::remove_tag(rr[r1_i].as_mut_vec(), b"MQ");
bam_fields::remove_tag(rr[r1_i].as_mut_vec(), b"MC");
bam_fields::set_template_length(&mut rr[r1_i], 0);
bam_fields::set_ref_id(&mut rr[r2_i], -1);
bam_fields::set_pos(&mut rr[r2_i], -1);
bam_fields::set_mate_ref_id(&mut rr[r2_i], -1);
bam_fields::set_mate_pos(&mut rr[r2_i], -1);
set_mate_flags(&mut rr[r2_i], r1_is_reverse, true);
bam_fields::remove_tag(rr[r2_i].as_mut_vec(), b"MQ");
bam_fields::remove_tag(rr[r2_i].as_mut_vec(), b"MC");
bam_fields::set_template_length(&mut rr[r2_i], 0);
}
fn set_mate_info_one_unmapped(&mut self, r1_i: usize, r2_i: usize, r1_is_unmapped: bool) {
use crate::sort::bam_fields;
let (mapped_i, unmapped_i) = if r1_is_unmapped { (r2_i, r1_i) } else { (r1_i, r2_i) };
let rr = &self.records;
let mapped_ref_id = bam_fields::ref_id(&rr[mapped_i]);
let mapped_pos = bam_fields::pos(&rr[mapped_i]);
let mapped_flags = RawRecordView::new(&rr[mapped_i]).flags();
let mapped_is_reverse = (mapped_flags & bam_fields::flags::REVERSE) != 0;
let mapped_mapq = bam_fields::mapq(&rr[mapped_i]);
let mapped_cigar_str = bam_fields::cigar_to_string_from_raw(&rr[mapped_i]);
let unmapped_is_reverse =
(RawRecordView::new(&rr[unmapped_i]).flags() & bam_fields::flags::REVERSE) != 0;
let rr = &mut self.records;
bam_fields::set_ref_id(&mut rr[unmapped_i], mapped_ref_id);
bam_fields::set_pos(&mut rr[unmapped_i], mapped_pos);
bam_fields::set_mate_ref_id(&mut rr[mapped_i], mapped_ref_id);
bam_fields::set_mate_pos(&mut rr[mapped_i], mapped_pos);
set_mate_flags(&mut rr[mapped_i], unmapped_is_reverse, true);
bam_fields::remove_tag(rr[mapped_i].as_mut_vec(), b"MQ");
bam_fields::remove_tag(rr[mapped_i].as_mut_vec(), b"MC");
bam_fields::set_template_length(&mut rr[mapped_i], 0);
bam_fields::set_mate_ref_id(&mut rr[unmapped_i], mapped_ref_id);
bam_fields::set_mate_pos(&mut rr[unmapped_i], mapped_pos);
set_mate_flags(&mut rr[unmapped_i], mapped_is_reverse, false);
let mq_val = if mapped_mapq == 255 { 255 } else { i32::from(mapped_mapq) };
bam_fields::update_int_tag(rr[unmapped_i].as_mut_vec(), b"MQ", mq_val);
if !mapped_cigar_str.is_empty() && mapped_cigar_str != "*" {
bam_fields::update_string_tag(
rr[unmapped_i].as_mut_vec(),
b"MC",
mapped_cigar_str.as_bytes(),
);
} else {
bam_fields::remove_tag(rr[unmapped_i].as_mut_vec(), b"MC");
}
bam_fields::set_template_length(&mut rr[unmapped_i], 0);
}
}
fn set_mate_flags(record: &mut [u8], mate_is_reverse: bool, mate_is_unmapped: bool) {
use crate::sort::bam_fields;
let mut f = RawRecordView::new(record).flags();
f &= !bam_fields::flags::MATE_REVERSE;
if mate_is_reverse {
f |= bam_fields::flags::MATE_REVERSE;
}
f &= !bam_fields::flags::MATE_UNMAPPED;
if mate_is_unmapped {
f |= bam_fields::flags::MATE_UNMAPPED;
}
bam_fields::set_flags(record, f);
}
fn compute_insert_size_raw(rec1: &[u8], rec2: &[u8]) -> i32 {
use crate::sort::bam_fields;
let f1 = RawRecordView::new(rec1).flags();
let f2 = RawRecordView::new(rec2).flags();
if (f1 & bam_fields::flags::UNMAPPED) != 0 || (f2 & bam_fields::flags::UNMAPPED) != 0 {
return 0;
}
if bam_fields::ref_id(rec1) != bam_fields::ref_id(rec2) {
return 0;
}
let pos1 = bam_fields::pos(rec1) + 1;
let pos2 = bam_fields::pos(rec2) + 1;
let ref_len1 = bam_fields::reference_length_from_raw_bam(rec1);
let ref_len2 = bam_fields::reference_length_from_raw_bam(rec2);
let end1 = pos1 + ref_len1 - 1;
let end2 = pos2 + ref_len2 - 1;
let first_5prime = if (f1 & bam_fields::flags::REVERSE) != 0 { end1 } else { pos1 };
let second_5prime = if (f2 & bam_fields::flags::REVERSE) != 0 { end2 } else { pos2 };
let adjustment = if second_5prime >= first_5prime { 1 } else { -1 };
second_5prime - first_5prime + adjustment
}
#[allow(clippy::cast_possible_wrap)]
fn get_pair_orientation_raw(record: &[u8]) -> PairOrientation {
use crate::sort::bam_fields;
let f = RawRecordView::new(record).flags();
let is_reverse = (f & bam_fields::flags::REVERSE) != 0;
let mate_reverse = (f & bam_fields::flags::MATE_REVERSE) != 0;
if is_reverse == mate_reverse {
return PairOrientation::Tandem;
}
let alignment_start = bam_fields::pos(record) + 1; let mate_start = bam_fields::mate_pos(record) + 1;
let insert_size = bam_fields::template_length(record);
let (positive_five_prime, negative_five_prime) = if is_reverse {
let ref_len = bam_fields::reference_length_from_raw_bam(record);
let end = alignment_start + ref_len - 1;
(i64::from(mate_start), i64::from(end))
} else {
(i64::from(alignment_start), i64::from(alignment_start) + i64::from(insert_size))
};
if positive_five_prime < negative_five_prime {
PairOrientation::FR
} else {
PairOrientation::RF
}
}
pub struct TemplateIterator<R>
where
R: std::io::Read,
{
reader: fgumi_raw_bam::RawBamReader<R>,
pending: Option<RawRecord>,
exhausted: bool,
}
impl<R: std::io::Read> TemplateIterator<R> {
pub fn new(reader: fgumi_raw_bam::RawBamReader<R>) -> Self {
TemplateIterator { reader, pending: None, exhausted: false }
}
}
impl<R: std::io::Read> Iterator for TemplateIterator<R> {
type Item = Result<Template>;
fn next(&mut self) -> Option<Self::Item> {
if self.exhausted && self.pending.is_none() {
return None;
}
let mut batch: Vec<RawRecord> = Vec::with_capacity(2);
if let Some(peeked) = self.pending.take() {
batch.push(peeked);
}
loop {
if self.exhausted {
break;
}
let mut rec = RawRecord::new();
match self.reader.read_record(&mut rec) {
Ok(0) => {
self.exhausted = true;
break;
}
Ok(_) => {
let name = fgumi_raw_bam::read_name(&rec).to_vec();
if batch.is_empty() {
batch.push(rec);
} else {
let first_name = fgumi_raw_bam::read_name(&batch[0]).to_vec();
if name == first_name {
batch.push(rec);
} else {
self.pending = Some(rec);
break;
}
}
}
Err(e) => return Some(Err(anyhow::Error::from(e))),
}
}
if batch.is_empty() {
return None;
}
Some(Template::from_records(batch))
}
}
impl MemoryEstimate for Template {
fn estimate_heap_size(&self) -> usize {
let name_size = self.name.capacity();
let records_size = self.records.iter().map(RawRecord::capacity).sum::<usize>()
+ self.records.capacity() * std::mem::size_of::<RawRecord>();
name_size + records_size
}
}
impl MemoryEstimate for TemplateBatch {
fn estimate_heap_size(&self) -> usize {
self.iter().map(MemoryEstimate::estimate_heap_size).sum::<usize>()
+ self.capacity() * std::mem::size_of::<Template>()
}
}
#[cfg(test)]
#[allow(clippy::similar_names)]
mod tests {
use super::*;
use fgumi_raw_bam::{RawRecord, SamBuilder as RawSamBuilder, flags as raw_flags};
const FLAG_PAIRED: u16 = 0x1;
const FLAG_READ1: u16 = 0x40;
const FLAG_READ2: u16 = 0x80;
const FLAG_SECONDARY: u16 = 0x100;
const FLAG_SUPPLEMENTARY: u16 = 0x800;
const FLAG_REVERSE: u16 = 0x10;
const FLAG_MATE_REVERSE: u16 = 0x20;
const FLAG_UNMAPPED: u16 = 0x4;
const FLAG_MATE_UNMAPPED: u16 = 0x8;
fn create_test_raw(name: &[u8], flags: u16) -> RawRecord {
let mut b = RawSamBuilder::new();
b.read_name(name).sequence(b"ACGT").qualities(&[30; 4]).flags(flags);
b.build()
}
fn create_mapped_raw(name: &[u8], flags: u16, pos: usize, mapq: u8) -> RawRecord {
create_mapped_raw_with_tlen(name, flags, pos, mapq, 0)
}
fn create_mapped_raw_with_tlen(
name: &[u8],
flags: u16,
pos: usize,
mapq: u8,
tlen: i32,
) -> RawRecord {
use fgumi_raw_bam::testutil::encode_op;
let mut b = RawSamBuilder::new();
b.read_name(name)
.sequence(&[b'A'; 100])
.qualities(&[30u8; 100])
.flags(flags)
.ref_id(0)
.pos(i32::try_from(pos).expect("pos fits i32") - 1)
.mapq(mapq)
.template_length(tlen)
.cigar_ops(&[encode_op(0, 100)]);
b.build()
}
fn create_mapped_raw_with_flags(
name: &[u8],
flags: u16,
pos: usize,
mapq: u8,
tlen: i32,
mate_ref_id: Option<usize>,
mate_pos: Option<usize>,
) -> RawRecord {
use fgumi_raw_bam::testutil::encode_op;
let mut b = RawSamBuilder::new();
b.read_name(name)
.sequence(&[b'A'; 100])
.qualities(&[30u8; 100])
.flags(flags)
.ref_id(0)
.pos(i32::try_from(pos).expect("pos fits i32") - 1)
.mapq(mapq)
.template_length(tlen)
.cigar_ops(&[encode_op(0, 100)]);
if let Some(mref) = mate_ref_id {
b.mate_ref_id(i32::try_from(mref).expect("mate_ref_id fits i32"));
}
if let Some(mpos) = mate_pos {
b.mate_pos(i32::try_from(mpos).expect("mate_pos fits i32") - 1);
}
b.build()
}
#[test]
fn test_template_new() {
let template = Template::new(b"read1".to_vec());
assert_eq!(template.name, b"read1");
assert_eq!(template.read_count(), 0);
}
#[test]
fn test_from_records_single_r1() -> Result<()> {
let r1 = create_test_raw(b"read1", 0);
let template = Template::from_records(vec![r1])?;
assert_eq!(template.name, b"read1");
assert_eq!(template.read_count(), 1);
assert!(template.r1().is_some());
Ok(())
}
#[test]
fn test_from_records_paired() -> Result<()> {
let r1 = create_test_raw(b"read1", FLAG_PAIRED | FLAG_READ1);
let r2 = create_test_raw(b"read1", FLAG_PAIRED | FLAG_READ2);
let template = Template::from_records(vec![r1, r2])?;
assert_eq!(template.read_count(), 2);
Ok(())
}
#[test]
fn test_from_records_supplementary() -> Result<()> {
let r1 = create_test_raw(b"read1", FLAG_PAIRED | FLAG_READ1);
let supp = create_test_raw(b"read1", FLAG_PAIRED | FLAG_READ1 | FLAG_SUPPLEMENTARY);
let template = Template::from_records(vec![r1, supp])?;
assert_eq!(template.read_count(), 2);
assert!(template.r1().is_some());
Ok(())
}
#[test]
fn test_from_records_error_name_mismatch() {
let r1 = create_test_raw(b"read1", FLAG_PAIRED | FLAG_READ1);
let supp = create_test_raw(b"read1", FLAG_PAIRED | FLAG_READ1 | FLAG_SUPPLEMENTARY);
let r2_wrong = create_test_raw(b"read2", FLAG_PAIRED | FLAG_READ2);
let result = Template::from_records(vec![r1, supp, r2_wrong]);
assert!(result.is_err());
}
#[test]
fn test_from_records_error_multiple_r1() {
let r1a = create_test_raw(b"read1", 0);
let r1b = create_test_raw(b"read1", 0);
let result = Template::from_records(vec![r1a, r1b]);
assert!(result.is_err());
assert!(result.unwrap_err().to_string().contains("Multiple non-secondary"));
}
#[test]
fn test_from_records_error_multiple_r2() {
let r2a = create_test_raw(b"read1", FLAG_PAIRED | FLAG_READ2);
let r2b = create_test_raw(b"read1", FLAG_PAIRED | FLAG_READ2);
let result = Template::from_records(vec![r2a, r2b]);
assert!(result.is_err());
assert!(result.unwrap_err().to_string().contains("Multiple non-secondary"));
}
#[test]
fn test_from_records_error_empty() {
let result = Template::from_records(vec![]);
assert!(result.is_err());
}
#[test]
fn test_template_from_records_basic() -> Result<()> {
let r1 = create_test_raw(b"read1", FLAG_PAIRED | FLAG_READ1);
let r2 = create_test_raw(b"read1", FLAG_PAIRED | FLAG_READ2);
let template = Template::from_records(vec![r1, r2])?;
assert_eq!(template.read_count(), 2);
assert!(template.r1().is_some());
assert!(template.r2().is_some());
Ok(())
}
#[test]
fn test_primary_reads_iter() -> Result<()> {
let r1 = create_test_raw(b"read1", FLAG_PAIRED | FLAG_READ1);
let r2 = create_test_raw(b"read1", FLAG_PAIRED | FLAG_READ2);
let template = Template::from_records(vec![r1, r2])?;
let primaries: Vec<_> = template.primary_reads().collect();
assert_eq!(primaries.len(), 2);
Ok(())
}
#[test]
fn test_into_records_consumes() -> Result<()> {
let r1 = create_test_raw(b"read1", FLAG_PAIRED | FLAG_READ1);
let r2 = create_test_raw(b"read1", FLAG_PAIRED | FLAG_READ2);
let template = Template::from_records(vec![r1, r2])?;
assert!(template.r1().is_some());
assert!(template.r2().is_some());
let recs = template.into_records();
assert_eq!(recs.len(), 2);
Ok(())
}
#[test]
fn test_read_count_with_supplementals() -> Result<()> {
let r1 = create_test_raw(b"read1", 0);
let supp = create_test_raw(b"read1", FLAG_SUPPLEMENTARY);
let sec = create_test_raw(b"read1", FLAG_SECONDARY);
let template = Template::from_records(vec![r1, supp, sec])?;
assert_eq!(template.read_count(), 3);
Ok(())
}
#[test]
fn test_r1_supplementals_range() -> Result<()> {
let r1 = create_test_raw(b"read1", FLAG_PAIRED | FLAG_READ1);
let supp1 = create_test_raw(b"read1", FLAG_PAIRED | FLAG_READ1 | FLAG_SUPPLEMENTARY);
let supp2 = create_test_raw(b"read1", FLAG_PAIRED | FLAG_READ1 | FLAG_SUPPLEMENTARY);
let template = Template::from_records(vec![r1, supp1, supp2])?;
assert!(template.r1_supplementals.is_some());
let (start, end) = template.r1_supplementals.unwrap();
assert_eq!(end - start, 2);
Ok(())
}
#[test]
fn test_r2_supplementals_range() -> Result<()> {
let r1 = create_test_raw(b"read1", FLAG_PAIRED | FLAG_READ1);
let r2 = create_test_raw(b"read1", FLAG_PAIRED | FLAG_READ2);
let supp = create_test_raw(b"read1", FLAG_PAIRED | FLAG_READ2 | FLAG_SUPPLEMENTARY);
let template = Template::from_records(vec![r1, r2, supp])?;
assert!(template.r2_supplementals.is_some());
let (start, end) = template.r2_supplementals.unwrap();
assert_eq!(end - start, 1);
Ok(())
}
#[test]
fn test_r1_secondaries_range() -> Result<()> {
let r1 = create_test_raw(b"read1", 0);
let sec = create_test_raw(b"read1", FLAG_SECONDARY);
let template = Template::from_records(vec![r1, sec])?;
assert!(template.r1_secondaries.is_some());
let (start, end) = template.r1_secondaries.unwrap();
assert_eq!(end - start, 1);
Ok(())
}
#[test]
fn test_r2_secondaries_range() -> Result<()> {
let r2a = create_test_raw(b"read1", FLAG_PAIRED | FLAG_READ2);
let r2b = create_test_raw(b"read1", FLAG_PAIRED | FLAG_READ2 | FLAG_SECONDARY);
let r2c = create_test_raw(b"read1", FLAG_PAIRED | FLAG_READ2 | FLAG_SECONDARY);
let template = Template::from_records(vec![r2a, r2b, r2c])?;
assert!(template.r2_secondaries.is_some());
let (start, end) = template.r2_secondaries.unwrap();
assert_eq!(end - start, 2);
Ok(())
}
fn create_mapped_record(name: &[u8], flags: u16, pos: usize, mapq: u8) -> RawRecord {
create_mapped_raw(name, flags, pos, mapq)
}
fn create_mapped_record_with_tlen(
name: &[u8],
flags: u16,
pos: usize,
mapq: u8,
tlen: i32,
) -> RawRecord {
create_mapped_raw_with_tlen(name, flags, pos, mapq, tlen)
}
#[test]
fn test_fix_mate_info_sets_ms_tag_with_int8_as() -> Result<()> {
let mut b = RawSamBuilder::new();
b.read_name(b"read1")
.sequence(&[b'A'; 100])
.qualities(&[30u8; 100])
.flags(FLAG_PAIRED | FLAG_READ1)
.ref_id(0)
.pos(99)
.mapq(30)
.cigar_ops(&[fgumi_raw_bam::testutil::encode_op(0, 100)])
.add_int_tag(b"AS", 55);
let r1 = b.build();
let mut b = RawSamBuilder::new();
b.read_name(b"read1")
.sequence(&[b'A'; 100])
.qualities(&[30u8; 100])
.flags(FLAG_PAIRED | FLAG_READ2)
.ref_id(0)
.pos(199)
.mapq(40)
.cigar_ops(&[fgumi_raw_bam::testutil::encode_op(0, 100)])
.add_int_tag(b"AS", 44);
let r2 = b.build();
let mut template = Template::from_records(vec![r1, r2])?;
template.fix_mate_info()?;
assert_eq!(template.records()[0].tags().find_int(b"ms"), Some(44), "R1 ms should be 44");
assert_eq!(template.records()[1].tags().find_int(b"ms"), Some(55), "R2 ms should be 55");
Ok(())
}
#[test]
fn test_fix_mate_info_sets_ms_tag_with_uint8_as() -> Result<()> {
let mut b = RawSamBuilder::new();
b.read_name(b"read1")
.sequence(&[b'A'; 100])
.qualities(&[30u8; 100])
.flags(FLAG_PAIRED | FLAG_READ1)
.ref_id(0)
.pos(99)
.mapq(30)
.cigar_ops(&[fgumi_raw_bam::testutil::encode_op(0, 100)])
.add_int_tag(b"AS", 77);
let r1 = b.build();
let mut b = RawSamBuilder::new();
b.read_name(b"read1")
.sequence(&[b'A'; 100])
.qualities(&[30u8; 100])
.flags(FLAG_PAIRED | FLAG_READ2)
.ref_id(0)
.pos(199)
.mapq(40)
.cigar_ops(&[fgumi_raw_bam::testutil::encode_op(0, 100)])
.add_int_tag(b"AS", 88);
let r2 = b.build();
let mut template = Template::from_records(vec![r1, r2])?;
template.fix_mate_info()?;
assert_eq!(template.records()[0].tags().find_int(b"ms"), Some(88), "R1 ms should be 88");
assert_eq!(template.records()[1].tags().find_int(b"ms"), Some(77), "R2 ms should be 77");
Ok(())
}
#[test]
fn test_fix_mate_info_sets_ms_tag_with_int16_as() -> Result<()> {
let mut b = RawSamBuilder::new();
b.read_name(b"read1")
.sequence(&[b'A'; 100])
.qualities(&[30u8; 100])
.flags(FLAG_PAIRED | FLAG_READ1)
.ref_id(0)
.pos(99)
.mapq(30)
.cigar_ops(&[fgumi_raw_bam::testutil::encode_op(0, 100)])
.add_int_tag(b"AS", 1000);
let r1 = b.build();
let mut b = RawSamBuilder::new();
b.read_name(b"read1")
.sequence(&[b'A'; 100])
.qualities(&[30u8; 100])
.flags(FLAG_PAIRED | FLAG_READ2)
.ref_id(0)
.pos(199)
.mapq(40)
.cigar_ops(&[fgumi_raw_bam::testutil::encode_op(0, 100)])
.add_int_tag(b"AS", 2000);
let r2 = b.build();
let mut template = Template::from_records(vec![r1, r2])?;
template.fix_mate_info()?;
assert_eq!(
template.records()[0].tags().find_int(b"ms"),
Some(2000),
"R1 ms should be 2000"
);
assert_eq!(
template.records()[1].tags().find_int(b"ms"),
Some(1000),
"R2 ms should be 1000"
);
Ok(())
}
#[test]
fn test_fix_mate_info_sets_ms_tag_with_int32_as() -> Result<()> {
let mut b = RawSamBuilder::new();
b.read_name(b"read1")
.sequence(&[b'A'; 100])
.qualities(&[30u8; 100])
.flags(FLAG_PAIRED | FLAG_READ1)
.ref_id(0)
.pos(99)
.mapq(30)
.cigar_ops(&[fgumi_raw_bam::testutil::encode_op(0, 100)])
.add_int_tag(b"AS", 100_000);
let r1 = b.build();
let mut b = RawSamBuilder::new();
b.read_name(b"read1")
.sequence(&[b'A'; 100])
.qualities(&[30u8; 100])
.flags(FLAG_PAIRED | FLAG_READ2)
.ref_id(0)
.pos(199)
.mapq(40)
.cigar_ops(&[fgumi_raw_bam::testutil::encode_op(0, 100)])
.add_int_tag(b"AS", 200_000);
let r2 = b.build();
let mut template = Template::from_records(vec![r1, r2])?;
template.fix_mate_info()?;
assert_eq!(
template.records()[0].tags().find_int(b"ms"),
Some(200_000),
"R1 ms should be 200_000"
);
assert_eq!(
template.records()[1].tags().find_int(b"ms"),
Some(100_000),
"R2 ms should be 100_000"
);
Ok(())
}
#[test]
fn test_fix_mate_info_no_ms_tag_without_as() -> Result<()> {
let r1 = create_mapped_record(b"read1", FLAG_PAIRED | FLAG_READ1, 100, 30);
let r2 = create_mapped_record(b"read1", FLAG_PAIRED | FLAG_READ2, 200, 40);
let mut template = Template::from_records(vec![r1, r2])?;
template.fix_mate_info()?;
assert!(
template.records()[0].tags().find_int(b"ms").is_none(),
"R1 should not have ms tag"
);
assert!(
template.records()[1].tags().find_int(b"ms").is_none(),
"R2 should not have ms tag"
);
Ok(())
}
#[test]
fn test_fix_mate_info_sets_ms_tag_on_supplementals() -> Result<()> {
let mut b = RawSamBuilder::new();
b.read_name(b"read1")
.sequence(&[b'A'; 100])
.qualities(&[30u8; 100])
.flags(FLAG_PAIRED | FLAG_READ1)
.ref_id(0)
.pos(99)
.mapq(30)
.cigar_ops(&[fgumi_raw_bam::testutil::encode_op(0, 100)])
.add_int_tag(b"AS", 55);
let r1 = b.build();
let mut b = RawSamBuilder::new();
b.read_name(b"read1")
.sequence(&[b'A'; 100])
.qualities(&[30u8; 100])
.flags(FLAG_PAIRED | FLAG_READ2)
.ref_id(0)
.pos(199)
.mapq(40)
.cigar_ops(&[fgumi_raw_bam::testutil::encode_op(0, 100)])
.add_int_tag(b"AS", 44);
let r2 = b.build();
let r1_supp =
create_mapped_record(b"read1", FLAG_PAIRED | FLAG_READ1 | FLAG_SUPPLEMENTARY, 300, 20);
let mut template = Template::from_records(vec![r1, r2, r1_supp])?;
template.fix_mate_info()?;
assert_eq!(
template.records()[2].tags().find_int(b"ms"),
Some(44),
"R1 supplementary ms should be R2's AS value (44)"
);
Ok(())
}
#[test]
fn test_fix_mate_info_sets_tlen_on_r1_supplementals() -> Result<()> {
let r1 = create_mapped_record_with_tlen(b"read1", FLAG_PAIRED | FLAG_READ1, 100, 30, 200);
let r2 = create_mapped_record_with_tlen(b"read1", FLAG_PAIRED | FLAG_READ2, 200, 40, -200);
let r1_supp = create_mapped_record_with_tlen(
b"read1",
FLAG_PAIRED | FLAG_READ1 | FLAG_SUPPLEMENTARY,
300,
20,
0,
);
let mut template = Template::from_records(vec![r1, r2, r1_supp])?;
template.fix_mate_info()?;
assert_eq!(
template.records()[2].template_length(),
101,
"R1 supplementary TLEN should be negative of R2's TLEN"
);
Ok(())
}
#[test]
fn test_fix_mate_info_sets_tlen_on_r2_supplementals() -> Result<()> {
let r1 = create_mapped_record_with_tlen(b"read1", FLAG_PAIRED | FLAG_READ1, 100, 30, 300);
let r2 = create_mapped_record_with_tlen(b"read1", FLAG_PAIRED | FLAG_READ2, 200, 40, -300);
let r2_supp = create_mapped_record_with_tlen(
b"read1",
FLAG_PAIRED | FLAG_READ2 | FLAG_SUPPLEMENTARY,
400,
25,
0,
);
let mut template = Template::from_records(vec![r1, r2, r2_supp])?;
template.fix_mate_info()?;
assert_eq!(
template.records()[2].template_length(),
-101,
"R2 supplementary TLEN should be negative of R1's TLEN"
);
Ok(())
}
#[test]
fn test_fix_mate_info_sets_tlen_on_multiple_supplementals() -> Result<()> {
let r1 = create_mapped_record_with_tlen(b"read1", FLAG_PAIRED | FLAG_READ1, 100, 30, 500);
let r2 = create_mapped_record_with_tlen(b"read1", FLAG_PAIRED | FLAG_READ2, 200, 40, -500);
let r1_supp1 = create_mapped_record_with_tlen(
b"read1",
FLAG_PAIRED | FLAG_READ1 | FLAG_SUPPLEMENTARY,
300,
20,
0,
);
let r1_supp2 = create_mapped_record_with_tlen(
b"read1",
FLAG_PAIRED | FLAG_READ1 | FLAG_SUPPLEMENTARY,
400,
15,
0,
);
let r2_supp1 = create_mapped_record_with_tlen(
b"read1",
FLAG_PAIRED | FLAG_READ2 | FLAG_SUPPLEMENTARY,
500,
25,
0,
);
let r2_supp2 = create_mapped_record_with_tlen(
b"read1",
FLAG_PAIRED | FLAG_READ2 | FLAG_SUPPLEMENTARY,
600,
10,
0,
);
let mut template =
Template::from_records(vec![r1, r2, r1_supp1, r1_supp2, r2_supp1, r2_supp2])?;
template.fix_mate_info()?;
assert_eq!(template.records()[2].template_length(), 101, "R1 supp1 TLEN");
assert_eq!(template.records()[3].template_length(), 101, "R1 supp2 TLEN");
assert_eq!(template.records()[4].template_length(), -101, "R2 supp1 TLEN");
assert_eq!(template.records()[5].template_length(), -101, "R2 supp2 TLEN");
Ok(())
}
#[test]
fn test_fix_mate_info_tlen_recalculated() -> Result<()> {
let r1 = create_mapped_record_with_tlen(b"read1", FLAG_PAIRED | FLAG_READ1, 100, 30, 0);
let r2 = create_mapped_record_with_tlen(b"read1", FLAG_PAIRED | FLAG_READ2, 200, 40, 0);
let r1_supp = create_mapped_record_with_tlen(
b"read1",
FLAG_PAIRED | FLAG_READ1 | FLAG_SUPPLEMENTARY,
300,
20,
999,
);
let mut template = Template::from_records(vec![r1, r2, r1_supp])?;
template.fix_mate_info()?;
assert_eq!(
template.records()[2].template_length(),
101,
"R1 supplementary TLEN should be recalculated from positions"
);
Ok(())
}
#[test]
fn test_fix_mate_info_no_mate_primary() -> Result<()> {
let r1 = create_mapped_record_with_tlen(b"read1", FLAG_PAIRED | FLAG_READ1, 100, 30, 200);
let r1_supp = create_mapped_record_with_tlen(
b"read1",
FLAG_PAIRED | FLAG_READ1 | FLAG_SUPPLEMENTARY,
300,
20,
777,
);
let mut template = Template::from_records(vec![r1, r1_supp])?;
template.fix_mate_info()?;
assert_eq!(
template.records()[1].template_length(),
777,
"R1 supplementary TLEN should be unchanged without R2 primary"
);
Ok(())
}
#[test]
fn test_template_record_ordering() -> Result<()> {
let r1 = create_test_raw(b"read1", FLAG_PAIRED | FLAG_READ1);
let r2 = create_test_raw(b"read1", FLAG_PAIRED | FLAG_READ2);
let r1_supp1 = create_test_raw(b"read1", FLAG_PAIRED | FLAG_READ1 | FLAG_SUPPLEMENTARY);
let r1_supp2 = create_test_raw(b"read1", FLAG_PAIRED | FLAG_READ1 | FLAG_SUPPLEMENTARY);
let r2_supp = create_test_raw(b"read1", FLAG_PAIRED | FLAG_READ2 | FLAG_SUPPLEMENTARY);
let r1_sec = create_test_raw(b"read1", FLAG_PAIRED | FLAG_READ1 | FLAG_SECONDARY);
let r2_sec1 = create_test_raw(b"read1", FLAG_PAIRED | FLAG_READ2 | FLAG_SECONDARY);
let r2_sec2 = create_test_raw(b"read1", FLAG_PAIRED | FLAG_READ2 | FLAG_SECONDARY);
let template = Template::from_records(vec![
r2_sec1, r1_supp2, r1, r2_supp, r1_sec, r2, r2_sec2, r1_supp1,
])?;
assert_eq!(template.r1, Some((0, 1)), "R1 should be at index 0");
assert_eq!(template.r2, Some((1, 2)), "R2 should be at index 1");
assert_eq!(template.r1_supplementals, Some((2, 4)), "R1 supps should be at indices 2-3");
assert_eq!(template.r2_supplementals, Some((4, 5)), "R2 supps should be at index 4");
assert_eq!(template.r1_secondaries, Some((5, 6)), "R1 secs should be at index 5");
assert_eq!(template.r2_secondaries, Some((6, 8)), "R2 secs should be at indices 6-7");
assert_eq!(template.read_count(), 8);
let recs = template.records();
assert!(recs[0].is_first_segment() && !recs[0].is_supplementary(), "R1 primary");
assert!(recs[1].is_last_segment() && !recs[1].is_supplementary(), "R2 primary");
assert!(recs[2].is_first_segment() && recs[2].is_supplementary(), "R1 supp[0]");
assert!(recs[3].is_first_segment() && recs[3].is_supplementary(), "R1 supp[1]");
assert!(recs[4].is_last_segment() && recs[4].is_supplementary(), "R2 supp");
assert!(recs[5].is_first_segment() && recs[5].is_secondary(), "R1 sec");
assert!(recs[6].is_last_segment() && recs[6].is_secondary(), "R2 sec[0]");
assert!(recs[7].is_last_segment() && recs[7].is_secondary(), "R2 sec[1]");
Ok(())
}
#[test]
fn test_record_ordering_within_groups_is_reversed() -> Result<()> {
let r1 = create_mapped_record(b"read1", FLAG_PAIRED | FLAG_READ1, 100, 30);
let r2 = create_mapped_record(b"read1", FLAG_PAIRED | FLAG_READ2, 200, 30);
let r1_supp1 =
create_mapped_record(b"read1", FLAG_PAIRED | FLAG_READ1 | FLAG_SUPPLEMENTARY, 1000, 20);
let r1_supp2 =
create_mapped_record(b"read1", FLAG_PAIRED | FLAG_READ1 | FLAG_SUPPLEMENTARY, 2000, 20);
let r1_supp3 =
create_mapped_record(b"read1", FLAG_PAIRED | FLAG_READ1 | FLAG_SUPPLEMENTARY, 3000, 20);
let r2_supp1 =
create_mapped_record(b"read1", FLAG_PAIRED | FLAG_READ2 | FLAG_SUPPLEMENTARY, 4000, 20);
let r2_supp2 =
create_mapped_record(b"read1", FLAG_PAIRED | FLAG_READ2 | FLAG_SUPPLEMENTARY, 5000, 20);
let r1_sec1 =
create_mapped_record(b"read1", FLAG_PAIRED | FLAG_READ1 | FLAG_SECONDARY, 6000, 10);
let r1_sec2 =
create_mapped_record(b"read1", FLAG_PAIRED | FLAG_READ1 | FLAG_SECONDARY, 7000, 10);
let template = Template::from_records(vec![
r1, r2, r1_supp1, r1_supp2, r1_supp3, r2_supp1, r2_supp2, r1_sec1, r1_sec2,
])?;
let recs = template.records();
assert_eq!(recs[0].pos() + 1, 100, "R1 primary");
assert_eq!(recs[1].pos() + 1, 200, "R2 primary");
assert_eq!(recs[2].pos() + 1, 3000, "R1 supp in reverse order");
assert_eq!(recs[3].pos() + 1, 2000, "R1 supp in reverse order");
assert_eq!(recs[4].pos() + 1, 1000, "R1 supp in reverse order");
assert_eq!(recs[5].pos() + 1, 5000, "R2 supp in reverse order");
assert_eq!(recs[6].pos() + 1, 4000, "R2 supp in reverse order");
assert_eq!(recs[7].pos() + 1, 7000, "R1 sec in reverse order");
assert_eq!(recs[8].pos() + 1, 6000, "R1 sec in reverse order");
Ok(())
}
fn create_mapped_record_with_flags(
name: &[u8],
flags: u16,
pos: usize,
mapq: u8,
tlen: i32,
mate_ref_id: Option<usize>,
mate_pos: Option<usize>,
) -> RawRecord {
create_mapped_raw_with_flags(name, flags, pos, mapq, tlen, mate_ref_id, mate_pos)
}
#[test]
fn test_fix_mate_info_sets_full_mate_info_on_r1_supplementals() -> Result<()> {
let mut b = RawSamBuilder::new();
b.read_name(b"read1")
.sequence(&[b'A'; 100])
.qualities(&[30u8; 100])
.flags(FLAG_PAIRED | FLAG_READ1)
.ref_id(0)
.pos(99)
.mapq(30)
.cigar_ops(&[fgumi_raw_bam::testutil::encode_op(0, 100)])
.template_length(200)
.mate_ref_id(0)
.mate_pos(199)
.add_int_tag(b"AS", 100);
let r1 = b.build();
let mut b = RawSamBuilder::new();
b.read_name(b"read1")
.sequence(&[b'A'; 100])
.qualities(&[30u8; 100])
.flags(FLAG_PAIRED | FLAG_READ2 | FLAG_REVERSE)
.ref_id(0)
.pos(199)
.mapq(40)
.cigar_ops(&[fgumi_raw_bam::testutil::encode_op(0, 100)])
.template_length(-200)
.mate_ref_id(0)
.mate_pos(99)
.add_int_tag(b"AS", 150);
let r2 = b.build();
let r1_supp = create_mapped_record_with_flags(
b"read1",
FLAG_PAIRED | FLAG_READ1 | FLAG_SUPPLEMENTARY | FLAG_MATE_REVERSE,
500,
25,
0,
Some(0),
Some(500),
);
let mut template = Template::from_records(vec![r1, r2, r1_supp])?;
template.fix_mate_info()?;
let supp = &template.records()[2];
assert_eq!(supp.mate_pos() + 1, 200, "R1 supp should have mate pos from R2");
assert_eq!(supp.mate_ref_id(), 0, "R1 supp should have mate ref from R2");
assert!(supp.is_mate_reverse(), "R1 supp should have mate_reverse since R2 is reverse");
assert!(!supp.is_mate_unmapped(), "R1 supp should NOT have mate_unmapped");
assert_eq!(supp.template_length(), 200, "R1 supp TLEN should be -(-200) = 200");
assert_eq!(supp.tags().find_int(b"MQ"), Some(40), "R1 supp MQ should be R2's mapq");
assert!(supp.tags().find_string(b"MC").is_some(), "R1 supp should have MC tag");
assert_eq!(supp.tags().find_int(b"ms"), Some(150), "R1 supp ms should be R2's AS");
Ok(())
}
#[test]
fn test_fix_mate_info_sets_full_mate_info_on_r2_supplementals() -> Result<()> {
let mut b = RawSamBuilder::new();
b.read_name(b"read1")
.sequence(&[b'A'; 100])
.qualities(&[30u8; 100])
.flags(FLAG_PAIRED | FLAG_READ1)
.ref_id(0)
.pos(99)
.mapq(35)
.cigar_ops(&[fgumi_raw_bam::testutil::encode_op(0, 100)])
.template_length(300)
.mate_ref_id(0)
.mate_pos(199)
.add_int_tag(b"AS", 120);
let r1 = b.build();
let mut b = RawSamBuilder::new();
b.read_name(b"read1")
.sequence(&[b'A'; 100])
.qualities(&[30u8; 100])
.flags(FLAG_PAIRED | FLAG_READ2)
.ref_id(0)
.pos(199)
.mapq(45)
.cigar_ops(&[fgumi_raw_bam::testutil::encode_op(0, 100)])
.template_length(-300)
.mate_ref_id(0)
.mate_pos(99)
.add_int_tag(b"AS", 180);
let r2 = b.build();
let r2_supp = create_mapped_record_with_flags(
b"read1",
FLAG_PAIRED | FLAG_READ2 | FLAG_SUPPLEMENTARY,
600,
20,
0,
Some(0),
Some(600),
);
let mut template = Template::from_records(vec![r1, r2, r2_supp])?;
template.fix_mate_info()?;
let supp = &template.records()[2];
assert_eq!(supp.mate_pos() + 1, 100, "R2 supp should have mate pos from R1");
assert!(
!supp.is_mate_reverse(),
"R2 supp should NOT have mate_reverse since R1 is forward"
);
assert_eq!(supp.template_length(), -101, "R2 supp TLEN should be -101");
assert_eq!(supp.tags().find_int(b"MQ"), Some(35), "R2 supp MQ should be R1's mapq");
assert!(supp.tags().find_string(b"MC").is_some(), "R2 supp should have MC tag");
assert_eq!(supp.tags().find_int(b"ms"), Some(120), "R2 supp ms should be R1's AS");
Ok(())
}
#[test]
fn test_fix_mate_info_supplemental_with_unmapped_mate() -> Result<()> {
let r1 = create_mapped_record_with_flags(
b"read1",
FLAG_PAIRED | FLAG_READ1 | FLAG_MATE_UNMAPPED,
100,
30,
0,
Some(0),
Some(100),
);
let mut b = RawSamBuilder::new();
b.read_name(b"read1")
.sequence(&[b'A'; 100])
.qualities(&[30u8; 100])
.flags(FLAG_PAIRED | FLAG_READ2 | FLAG_UNMAPPED)
.ref_id(0)
.pos(99)
.mapq(0)
.template_length(0)
.mate_ref_id(0)
.mate_pos(99);
let r2 = b.build();
let r1_supp = create_mapped_record_with_flags(
b"read1",
FLAG_PAIRED | FLAG_READ1 | FLAG_SUPPLEMENTARY | FLAG_MATE_REVERSE,
500,
25,
999,
Some(0),
Some(500),
);
let mut template = Template::from_records(vec![r1, r2, r1_supp])?;
template.fix_mate_info()?;
let supp = &template.records()[2];
assert!(supp.is_mate_unmapped(), "R1 supp should have mate_unmapped since R2 is unmapped");
assert!(
!supp.is_mate_reverse(),
"R1 supp should NOT have mate_reverse when R2 is unmapped"
);
assert!(supp.tags().find_string(b"MC").is_none(), "R1 supp should NOT have MC tag");
assert_eq!(supp.template_length(), 0, "TLEN should be 0 when mate is unmapped");
Ok(())
}
#[test]
fn test_fix_mate_info_clears_incorrect_mate_reverse_flag() -> Result<()> {
let r1 = create_mapped_record_with_flags(
b"read1",
FLAG_PAIRED | FLAG_READ1,
100,
30,
200,
Some(0),
Some(200),
);
let r2 = create_mapped_record_with_flags(
b"read1",
FLAG_PAIRED | FLAG_READ2, 200,
40,
-200,
Some(0),
Some(100),
);
let r1_supp = create_mapped_record_with_flags(
b"read1",
FLAG_PAIRED | FLAG_READ1 | FLAG_SUPPLEMENTARY | FLAG_MATE_REVERSE,
500,
25,
0,
Some(0),
Some(500),
);
let mut template = Template::from_records(vec![r1, r2, r1_supp])?;
assert!(
template.records()[2].is_mate_reverse(),
"Before fix: supp incorrectly has mate_reverse"
);
template.fix_mate_info()?;
assert!(
!template.records()[2].is_mate_reverse(),
"After fix: supp should NOT have mate_reverse"
);
Ok(())
}
#[test]
fn test_fix_mate_info_multiple_supplementals() -> Result<()> {
let r1 = create_mapped_record_with_flags(
b"read1",
FLAG_PAIRED | FLAG_READ1,
100,
30,
400,
Some(0),
Some(300),
);
let mut b = RawSamBuilder::new();
b.read_name(b"read1")
.sequence(&[b'A'; 100])
.qualities(&[30u8; 100])
.flags(FLAG_PAIRED | FLAG_READ2 | FLAG_REVERSE)
.ref_id(0)
.pos(299)
.mapq(50)
.cigar_ops(&[fgumi_raw_bam::testutil::encode_op(0, 100)])
.template_length(-400)
.mate_ref_id(0)
.mate_pos(99)
.add_int_tag(b"AS", 200);
let r2 = b.build();
let r1_supp1 = create_mapped_record_with_flags(
b"read1",
FLAG_PAIRED | FLAG_READ1 | FLAG_SUPPLEMENTARY,
500,
20,
0,
Some(0),
Some(500),
);
let r1_supp2 = create_mapped_record_with_flags(
b"read1",
FLAG_PAIRED | FLAG_READ1 | FLAG_SUPPLEMENTARY,
700,
15,
0,
Some(0),
Some(700),
);
let mut template = Template::from_records(vec![r1, r2, r1_supp1, r1_supp2])?;
template.fix_mate_info()?;
for i in 2..=3 {
let supp = &template.records()[i];
assert_eq!(supp.mate_pos() + 1, 300, "Supp {i} should have mate pos 300");
assert!(supp.is_mate_reverse(), "Supp {i} should have mate_reverse");
assert_eq!(supp.template_length(), 300, "Supp {i} TLEN should be 300");
}
Ok(())
}
#[test]
fn test_fix_mate_info_sets_tlen_normal_innie() -> Result<()> {
let mut b = RawSamBuilder::new();
b.read_name(b"read1")
.sequence(&[b'A'; 50])
.qualities(&[30u8; 50])
.flags(raw_flags::PAIRED | raw_flags::FIRST_SEGMENT)
.ref_id(0)
.pos(0)
.mapq(30)
.cigar_ops(&[50u32 << 4]);
let r1 = b.build();
let mut b = RawSamBuilder::new();
b.read_name(b"read1")
.sequence(&[b'A'; 100])
.qualities(&[30u8; 100])
.flags(raw_flags::PAIRED | raw_flags::LAST_SEGMENT | raw_flags::REVERSE)
.ref_id(0)
.pos(499)
.mapq(30)
.cigar_ops(&[100u32 << 4]);
let r2 = b.build();
let mut template = Template::from_records(vec![r1, r2])?;
template.fix_mate_info()?;
assert_eq!(template.records()[0].template_length(), 599, "R1 TLEN");
assert_eq!(template.records()[1].template_length(), -599, "R2 TLEN");
Ok(())
}
#[test]
fn test_fix_mate_info_sets_mate_cigar() -> Result<()> {
let mut b = RawSamBuilder::new();
b.read_name(b"read1")
.sequence(&[b'A'; 50])
.qualities(&[30u8; 50])
.flags(raw_flags::PAIRED | raw_flags::FIRST_SEGMENT)
.ref_id(0)
.pos(0)
.mapq(30)
.cigar_ops(&[50u32 << 4]); let r1 = b.build();
let cigar = vec![25u32 << 4, (5u32 << 4) | 1, 20u32 << 4];
let mut b = RawSamBuilder::new();
b.read_name(b"read1")
.sequence(&[b'A'; 50])
.qualities(&[30u8; 50])
.flags(raw_flags::PAIRED | raw_flags::LAST_SEGMENT | raw_flags::REVERSE)
.ref_id(0)
.pos(499)
.mapq(30)
.cigar_ops(&cigar);
let r2 = b.build();
let mut template = Template::from_records(vec![r1, r2])?;
template.fix_mate_info()?;
let r1_mc = template.records()[0].tags().find_string(b"MC");
assert!(r1_mc.is_some(), "R1 should have MC tag");
assert_eq!(r1_mc.unwrap(), b"25M5I20M", "R1 MC should be R2's CIGAR");
let r2_mc = template.records()[1].tags().find_string(b"MC");
assert!(r2_mc.is_some(), "R2 should have MC tag");
assert_eq!(r2_mc.unwrap(), b"50M", "R2 MC should be R1's CIGAR");
Ok(())
}
#[test]
fn test_fix_mate_info_both_unmapped() -> Result<()> {
let mut b = RawSamBuilder::new();
b.read_name(b"read1")
.sequence(b"ACGT")
.qualities(&[30u8; 4])
.flags(FLAG_PAIRED | FLAG_READ1 | FLAG_UNMAPPED)
.ref_id(0)
.pos(0)
.mapq(0)
.add_int_tag(b"MQ", 30);
let r1 = b.build();
let mut b = RawSamBuilder::new();
b.read_name(b"read1")
.sequence(b"ACGT")
.qualities(&[30u8; 4])
.flags(FLAG_PAIRED | FLAG_READ2 | FLAG_UNMAPPED)
.ref_id(0)
.pos(0)
.mapq(0)
.add_string_tag(b"MC", b"100M");
let r2 = b.build();
let mut template = Template::from_records(vec![r1, r2])?;
template.fix_mate_info()?;
assert_eq!(template.records()[0].ref_id(), -1, "R1 ref should be cleared");
assert_eq!(template.records()[1].ref_id(), -1, "R2 ref should be cleared");
assert!(template.records()[0].is_mate_unmapped(), "R1 mate_unmapped");
assert!(template.records()[1].is_mate_unmapped(), "R2 mate_unmapped");
assert!(template.records()[0].tags().find_int(b"MQ").is_none(), "R1 MQ should be removed");
assert!(
template.records()[1].tags().find_string(b"MC").is_none(),
"R2 MC should be removed"
);
assert_eq!(template.records()[0].template_length(), 0, "R1 TLEN should be 0");
assert_eq!(template.records()[1].template_length(), 0, "R2 TLEN should be 0");
Ok(())
}
#[test]
fn test_fix_mate_info_one_unmapped() -> Result<()> {
let r1 = create_mapped_record(b"read1", FLAG_PAIRED | FLAG_READ1, 100, 30);
let mut b = RawSamBuilder::new();
b.read_name(b"read1")
.sequence(b"ACGT")
.qualities(&[30u8; 4])
.flags(FLAG_PAIRED | FLAG_READ2 | FLAG_UNMAPPED)
.mapq(0);
let r2 = b.build();
let mut template = Template::from_records(vec![r1, r2])?;
template.fix_mate_info()?;
assert_eq!(template.records()[1].ref_id(), 0, "R2 should be placed at R1's reference");
assert_eq!(template.records()[1].pos() + 1, 100, "R2 should be placed at R1's position");
assert!(template.records()[0].tags().find_int(b"MQ").is_none(), "R1 should NOT have MQ");
assert!(template.records()[1].tags().find_int(b"MQ").is_some(), "R2 should have MQ");
assert!(template.records()[0].is_mate_unmapped(), "R1 mate_unmapped should be set");
assert!(!template.records()[1].is_mate_unmapped(), "R2 mate_unmapped should NOT be set");
assert_eq!(template.records()[0].template_length(), 0, "R1 TLEN");
assert_eq!(template.records()[1].template_length(), 0, "R2 TLEN");
assert!(template.records()[0].tags().find_string(b"MC").is_none(), "R1 NOT have MC");
assert!(template.records()[1].tags().find_string(b"MC").is_some(), "R2 should have MC");
Ok(())
}
#[test]
fn test_pair_orientation_fr_pair() -> Result<()> {
let r1 = create_mapped_record_with_flags(
b"read1",
FLAG_PAIRED | FLAG_READ1 | FLAG_MATE_REVERSE,
100,
30,
200, Some(0),
Some(200),
);
let r2 = create_mapped_record_with_flags(
b"read1",
FLAG_PAIRED | FLAG_READ2 | FLAG_REVERSE,
200,
30,
-200,
Some(0),
Some(100),
);
let template = Template::from_records(vec![r1, r2])?;
assert_eq!(template.pair_orientation(), Some(PairOrientation::FR));
Ok(())
}
#[test]
fn test_pair_orientation_rf_pair() -> Result<()> {
let r1 = create_mapped_record_with_flags(
b"read1",
FLAG_PAIRED | FLAG_READ1 | FLAG_MATE_REVERSE,
300,
30,
-200, Some(0),
Some(100),
);
let r2 = create_mapped_record_with_flags(
b"read1",
FLAG_PAIRED | FLAG_READ2 | FLAG_REVERSE,
100,
30,
200,
Some(0),
Some(300),
);
let template = Template::from_records(vec![r1, r2])?;
assert_eq!(template.pair_orientation(), Some(PairOrientation::RF));
Ok(())
}
#[test]
fn test_pair_orientation_tandem_both_forward() -> Result<()> {
let r1 = create_mapped_record_with_flags(
b"read1",
FLAG_PAIRED | FLAG_READ1, 100,
30,
100,
Some(0),
Some(200),
);
let r2 = create_mapped_record_with_flags(
b"read1",
FLAG_PAIRED | FLAG_READ2, 200,
30,
-100,
Some(0),
Some(100),
);
let template = Template::from_records(vec![r1, r2])?;
assert_eq!(template.pair_orientation(), Some(PairOrientation::Tandem));
Ok(())
}
#[test]
fn test_pair_orientation_tandem_both_reverse() -> Result<()> {
let r1 = create_mapped_record_with_flags(
b"read1",
FLAG_PAIRED | FLAG_READ1 | FLAG_REVERSE | FLAG_MATE_REVERSE,
100,
30,
100,
Some(0),
Some(200),
);
let r2 = create_mapped_record_with_flags(
b"read1",
FLAG_PAIRED | FLAG_READ2 | FLAG_REVERSE | FLAG_MATE_REVERSE,
200,
30,
-100,
Some(0),
Some(100),
);
let template = Template::from_records(vec![r1, r2])?;
assert_eq!(template.pair_orientation(), Some(PairOrientation::Tandem));
Ok(())
}
#[test]
fn test_pair_orientation_no_r1() -> Result<()> {
let r2 = create_mapped_record_with_flags(
b"read1",
FLAG_PAIRED | FLAG_READ2 | FLAG_REVERSE,
100,
30,
0,
Some(0),
Some(100),
);
let template = Template::from_records(vec![r2])?;
assert_eq!(template.pair_orientation(), None);
Ok(())
}
#[test]
fn test_pair_orientation_no_r2() -> Result<()> {
let r1 = create_mapped_record_with_flags(
b"read1",
FLAG_PAIRED | FLAG_READ1,
100,
30,
0,
Some(0),
Some(100),
);
let template = Template::from_records(vec![r1])?;
assert_eq!(template.pair_orientation(), None);
Ok(())
}
#[test]
fn test_pair_orientation_r1_unmapped() -> Result<()> {
let r1 = create_mapped_record_with_flags(
b"read1",
FLAG_PAIRED | FLAG_READ1 | FLAG_UNMAPPED,
100,
30,
0,
Some(0),
Some(100),
);
let r2 = create_mapped_record_with_flags(
b"read1",
FLAG_PAIRED | FLAG_READ2,
100,
30,
0,
Some(0),
Some(100),
);
let template = Template::from_records(vec![r1, r2])?;
assert_eq!(template.pair_orientation(), None);
Ok(())
}
#[test]
fn test_pair_orientation_r2_unmapped() -> Result<()> {
let r1 = create_mapped_record_with_flags(
b"read1",
FLAG_PAIRED | FLAG_READ1,
100,
30,
0,
Some(0),
Some(100),
);
let r2 = create_mapped_record_with_flags(
b"read1",
FLAG_PAIRED | FLAG_READ2 | FLAG_UNMAPPED,
100,
30,
0,
Some(0),
Some(100),
);
let template = Template::from_records(vec![r1, r2])?;
assert_eq!(template.pair_orientation(), None);
Ok(())
}
#[test]
fn test_pair_orientation_different_chromosomes() -> Result<()> {
let r1 = create_mapped_record_with_flags(
b"read1",
FLAG_PAIRED | FLAG_READ1 | FLAG_MATE_REVERSE,
100,
30,
200,
Some(0),
Some(200),
);
let mut b = RawSamBuilder::new();
b.read_name(b"read1")
.sequence(&[b'A'; 100])
.qualities(&[30u8; 100])
.flags(FLAG_PAIRED | FLAG_READ2 | FLAG_REVERSE)
.ref_id(1)
.pos(199)
.mapq(30)
.cigar_ops(&[fgumi_raw_bam::testutil::encode_op(0, 100)])
.template_length(-200)
.mate_ref_id(0)
.mate_pos(99);
let r2 = b.build();
let template = Template::from_records(vec![r1, r2])?;
assert_eq!(template.pair_orientation(), None);
Ok(())
}
#[test]
fn test_pair_orientation_fr_from_reverse_read() -> Result<()> {
let r1 = create_mapped_record_with_flags(
b"read1",
FLAG_PAIRED | FLAG_READ1 | FLAG_REVERSE, 200,
30,
-200, Some(0),
Some(100),
);
let r2 = create_mapped_record_with_flags(
b"read1",
FLAG_PAIRED | FLAG_READ2 | FLAG_MATE_REVERSE, 100,
30,
200,
Some(0),
Some(200),
);
let template = Template::from_records(vec![r1, r2])?;
assert_eq!(template.pair_orientation(), Some(PairOrientation::FR));
Ok(())
}
#[test]
fn test_from_records_truncated_header() {
let short = RawRecord::from(vec![0u8; 20]);
let err = Template::from_records(vec![short]).unwrap_err();
assert!(err.to_string().contains("too short"), "Error: {err}");
}
#[test]
fn test_from_records_truncated_read_name() {
let mut buf = vec![0u8; 34];
buf[8] = 10; let err = Template::from_records(vec![RawRecord::from(buf)]).unwrap_err();
assert!(err.to_string().contains("truncated"), "Error: {err}");
}
#[test]
fn test_write_with_offset_none() {
let mi = MoleculeId::None;
let mut buf = String::new();
let result = mi.write_with_offset(100, &mut buf);
assert!(result.is_empty());
}
#[test]
fn test_write_with_offset_single() {
let mi = MoleculeId::Single(5);
let mut buf = String::new();
let result = mi.write_with_offset(100, &mut buf);
assert_eq!(result, b"105");
}
#[test]
fn test_write_with_offset_paired_a() {
let mi = MoleculeId::PairedA(3);
let mut buf = String::new();
let result = mi.write_with_offset(10, &mut buf);
assert_eq!(result, b"13/A");
}
#[test]
fn test_write_with_offset_paired_b() {
let mi = MoleculeId::PairedB(3);
let mut buf = String::new();
let result = mi.write_with_offset(10, &mut buf);
assert_eq!(result, b"13/B");
}
#[test]
fn test_write_with_offset_reuses_buffer() {
let mut buf = String::new();
let mi1 = MoleculeId::Single(1);
let _ = mi1.write_with_offset(0, &mut buf);
assert_eq!(buf, "1");
let mi2 = MoleculeId::PairedA(99);
let result = mi2.write_with_offset(0, &mut buf);
assert_eq!(result, b"99/A");
assert_eq!(buf, "99/A");
}
#[test]
fn test_write_with_offset_matches_to_string() {
let mut buf = String::new();
for mi in [
MoleculeId::None,
MoleculeId::Single(0),
MoleculeId::Single(42),
MoleculeId::PairedA(7),
MoleculeId::PairedB(7),
] {
let expected = mi.to_string_with_offset(100);
let result = mi.write_with_offset(100, &mut buf);
assert_eq!(result, expected.as_bytes(), "Mismatch for {mi:?}");
}
}
}