#![allow(clippy::cast_possible_truncation)]
#![allow(clippy::cast_sign_loss)]
use std::num::NonZero;
use std::path::Path;
use rsomics_bamio::raw::{self, RawRecord};
use rsomics_common::{Result, RsomicsError};
const CIGAR_MATCH: u8 = 0;
const CIGAR_DELETION: u8 = 2;
const CIGAR_SKIP: u8 = 3;
const CIGAR_SEQ_MATCH: u8 = 7;
const CIGAR_SEQ_MISMATCH: u8 = 8;
#[derive(Debug, Clone, Copy, Default)]
pub struct BinFilter {
pub skip_flags: u16,
pub min_mapq: u8,
}
#[derive(Debug, Clone)]
pub struct ChromBins {
pub name: String,
pub chrom_len: u64,
pub bins: Vec<u32>,
}
#[derive(Debug, Clone)]
pub struct BinnedCoverage {
pub chroms: Vec<ChromBins>,
pub total_binned: u64,
pub total_mapped: u64,
}
impl BinnedCoverage {
#[must_use]
pub fn total_signal(&self) -> u64 {
self.chroms
.iter()
.map(|c| c.bins.iter().map(|&b| u64::from(b)).sum::<u64>())
.sum()
}
}
pub fn compute_coverage(
input: &Path,
bin_size: u32,
filter: BinFilter,
workers: NonZero<usize>,
) -> Result<BinnedCoverage> {
let mut reader = rsomics_bamio::open_with_workers(input, workers)?;
let header = reader.read_header().map_err(RsomicsError::Io)?;
let bin_size = u64::from(bin_size);
let mut chroms: Vec<ChromBins> = header
.reference_sequences()
.iter()
.map(|(name, seq)| {
let len = usize::from(seq.length()) as u64;
let n_bins = len.div_ceil(bin_size) as usize;
ChromBins {
name: name.to_string(),
chrom_len: len,
bins: vec![0u32; n_bins],
}
})
.collect();
let mut total_binned: u64 = 0;
let mut total_mapped: u64 = 0;
let mut record = RawRecord::default();
while raw::read_record(reader.get_mut(), &mut record)? != 0 {
let flags = record.flags();
if flags & 0x4 != 0 {
continue;
}
if record.reference_sequence_id() < 0 {
continue;
}
if filter.skip_flags != 0 && (flags & filter.skip_flags) != 0 {
continue;
}
if filter.min_mapq > 0 && record.mapping_quality() < filter.min_mapq {
continue;
}
total_mapped += 1;
let tid = record.reference_sequence_id() as usize;
let Some(chrom) = chroms.get_mut(tid) else {
continue;
};
let start0 = record.alignment_start() as u64;
let ref_len: u64 = record
.cigar_ops()
.filter_map(|(kind, len)| match kind {
CIGAR_MATCH | CIGAR_DELETION | CIGAR_SKIP | CIGAR_SEQ_MATCH
| CIGAR_SEQ_MISMATCH => Some(u64::from(len)),
_ => None,
})
.sum();
if ref_len == 0 {
continue;
}
let frag_end = start0 + ref_len;
let s_idx = (start0 / bin_size) as usize;
let e_idx = (frag_end.div_ceil(bin_size) as usize).min(chrom.bins.len());
if s_idx >= chrom.bins.len() {
continue;
}
for b in &mut chrom.bins[s_idx..e_idx] {
*b = b.saturating_add(1);
}
total_binned += 1;
}
Ok(BinnedCoverage {
chroms,
total_binned,
total_mapped,
})
}