use std::collections::HashMap;
use super::index::{MergedGeneModel, TranscriptInfo};
#[derive(Debug, Clone, Default)]
pub struct TranscriptCoverage {
coverage: HashMap<u32, Vec<i32>>,
}
impl TranscriptCoverage {
pub fn new() -> Self {
Self::default()
}
pub fn add_coverage(
&mut self,
m_blocks: &[(i32, i32)],
transcripts: &[TranscriptInfo],
tx_flat_offset: u32,
) {
for (local_idx, tx_info) in transcripts.iter().enumerate() {
let flat_idx = tx_flat_offset + local_idx as u32;
let tx_len = tx_info.exonic_length as usize;
if tx_len == 0 {
continue;
}
let depth = self
.coverage
.entry(flat_idx)
.or_insert_with(|| vec![0i32; tx_len]);
for &(block_start, block_end) in m_blocks {
let effective_end = block_end - 1;
let mut exonic_offset = 0usize;
for &(exon_start, exon_end) in &tx_info.exons {
let exon_len = (exon_end - exon_start) as usize;
let overlap_start = block_start.max(exon_start);
let overlap_end = effective_end.min(exon_end);
if overlap_start < overlap_end {
let rel_start = exonic_offset + (overlap_start - exon_start) as usize;
let rel_end =
(exonic_offset + (overlap_end - exon_start) as usize).min(tx_len);
if rel_start < tx_len && rel_start < rel_end {
for d in &mut depth[rel_start..rel_end] {
*d += 1;
}
}
}
exonic_offset += exon_len;
}
}
}
}
pub fn merge(&mut self, other: TranscriptCoverage) {
for (tx_idx, other_depth) in other.coverage {
let entry = self.coverage.entry(tx_idx);
match entry {
std::collections::hash_map::Entry::Occupied(mut e) => {
let self_depth = e.get_mut();
for (i, &val) in other_depth.iter().enumerate() {
if i < self_depth.len() {
self_depth[i] += val;
}
}
}
std::collections::hash_map::Entry::Vacant(e) => {
e.insert(other_depth);
}
}
}
}
#[allow(dead_code)]
pub fn get(&self, tx_flat_idx: u32) -> Option<&[i32]> {
self.coverage.get(&tx_flat_idx).map(|v| v.as_slice())
}
#[allow(dead_code)]
pub fn num_transcripts_with_coverage(&self) -> usize {
self.coverage.len()
}
#[allow(dead_code)]
pub fn mean_coverage(&self, tx_flat_idx: u32) -> f64 {
match self.coverage.get(&tx_flat_idx) {
Some(depth) if !depth.is_empty() => {
let sum: i64 = depth.iter().map(|&d| d as i64).sum();
sum as f64 / depth.len() as f64
}
_ => 0.0,
}
}
}
#[derive(Debug, Clone, Default)]
pub struct MergedGeneCoverage {
coverage: HashMap<u32, Vec<i32>>,
}
#[allow(dead_code)] impl MergedGeneCoverage {
pub fn new() -> Self {
Self::default()
}
pub fn add_coverage(
&mut self,
gene_idx: u32,
m_blocks: &[(i32, i32)],
model: &MergedGeneModel,
) {
let total_len: usize = model.exons.iter().map(|(s, e)| (e - s) as usize).sum();
if total_len == 0 {
return;
}
let depth = self
.coverage
.entry(gene_idx)
.or_insert_with(|| vec![0i32; total_len]);
for &(block_start, block_end) in m_blocks {
let effective_end = block_end - 1;
let mut exonic_offset = 0usize;
for &(exon_start, exon_end) in &model.exons {
let exon_len = (exon_end - exon_start) as usize;
let overlap_start = block_start.max(exon_start);
let overlap_end = effective_end.min(exon_end);
if overlap_start < overlap_end {
let rel_start = exonic_offset + (overlap_start - exon_start) as usize;
let rel_end =
(exonic_offset + (overlap_end - exon_start) as usize).min(total_len);
if rel_start < total_len && rel_start < rel_end {
for d in &mut depth[rel_start..rel_end] {
*d += 1;
}
}
}
exonic_offset += exon_len;
}
}
}
#[allow(dead_code)]
pub fn merge(&mut self, other: MergedGeneCoverage) {
for (gene_idx, other_depth) in other.coverage {
let entry = self.coverage.entry(gene_idx);
match entry {
std::collections::hash_map::Entry::Occupied(mut e) => {
let self_depth = e.get_mut();
for (i, &val) in other_depth.iter().enumerate() {
if i < self_depth.len() {
self_depth[i] += val;
}
}
}
std::collections::hash_map::Entry::Vacant(e) => {
e.insert(other_depth);
}
}
}
}
#[allow(dead_code)]
pub fn get(&self, gene_idx: u32) -> Option<&[i32]> {
self.coverage.get(&gene_idx).map(|v| v.as_slice())
}
#[allow(dead_code)]
pub fn iter(&self) -> impl Iterator<Item = (u32, &[i32])> {
self.coverage.iter().map(|(&idx, v)| (idx, v.as_slice()))
}
#[allow(dead_code)]
pub fn num_genes_with_coverage(&self) -> usize {
self.coverage.len()
}
}
#[cfg(test)]
mod tests {
use super::*;
fn make_tx_info(start: i32, end: i32) -> TranscriptInfo {
TranscriptInfo {
gene_idx: 0,
transcript_idx: 0,
gene_id: "g1".to_string(),
transcript_id: "t1".to_string(),
chrom: "chr1".to_string(),
start,
end,
strand: '+',
length: (end - start) as u32,
exons: vec![(start, end)],
exonic_length: (end - start) as u32,
}
}
#[test]
fn test_add_coverage_single_block() {
let mut cov = TranscriptCoverage::new();
let tx = make_tx_info(100, 200);
cov.add_coverage(&[(120, 150)], &[tx], 0);
let depth = cov.get(0).unwrap();
assert_eq!(depth.len(), 100);
assert_eq!(depth[19], 0);
assert_eq!(depth[20], 1);
assert_eq!(depth[48], 1);
assert_eq!(depth[49], 0);
}
#[test]
fn test_add_coverage_clipping() {
let mut cov = TranscriptCoverage::new();
let tx = make_tx_info(100, 200);
cov.add_coverage(&[(180, 220)], &[tx], 0);
let depth = cov.get(0).unwrap();
assert_eq!(depth[79], 0);
assert_eq!(depth[80], 1);
assert_eq!(depth[99], 1);
}
#[test]
fn test_merge() {
let mut cov1 = TranscriptCoverage::new();
let tx = make_tx_info(100, 200);
cov1.add_coverage(&[(120, 150)], std::slice::from_ref(&tx), 0);
let mut cov2 = TranscriptCoverage::new();
cov2.add_coverage(&[(130, 160)], &[tx], 0);
cov1.merge(cov2);
let depth = cov1.get(0).unwrap();
assert_eq!(depth[25], 1);
assert_eq!(depth[35], 2);
assert_eq!(depth[55], 1);
}
#[test]
fn test_mean_coverage() {
let mut cov = TranscriptCoverage::new();
let tx = make_tx_info(0, 10);
cov.add_coverage(&[(0, 10)], &[tx], 0);
let mean = cov.mean_coverage(0);
assert!((mean - 0.9).abs() < 1e-10);
assert!((cov.mean_coverage(99) - 0.0).abs() < 1e-10);
}
#[test]
fn test_merged_gene_single_exon() {
let model = MergedGeneModel {
strand: '+',
exons: vec![(100, 200)],
};
let mut cov = MergedGeneCoverage::new();
cov.add_coverage(0, &[(120, 150)], &model);
let depth = cov.get(0).unwrap();
assert_eq!(depth.len(), 100);
assert_eq!(depth[19], 0);
assert_eq!(depth[20], 1);
assert_eq!(depth[48], 1);
assert_eq!(depth[49], 0);
}
#[test]
fn test_merged_gene_two_exons() {
let model = MergedGeneModel {
strand: '+',
exons: vec![(100, 200), (300, 400)],
};
let mut cov = MergedGeneCoverage::new();
cov.add_coverage(0, &[(310, 330)], &model);
let depth = cov.get(0).unwrap();
assert_eq!(depth.len(), 200);
assert_eq!(depth[109], 0);
assert_eq!(depth[110], 1);
assert_eq!(depth[128], 1);
assert_eq!(depth[129], 0);
}
#[test]
fn test_merged_gene_overlapping_transcripts() {
let model = MergedGeneModel::from_transcripts(
'+',
&[(100, 200), (300, 400), (150, 250), (350, 400)],
);
assert_eq!(model.exons, vec![(100, 250), (300, 400)]);
let mut cov = MergedGeneCoverage::new();
cov.add_coverage(0, &[(180, 220)], &model);
let depth = cov.get(0).unwrap();
assert_eq!(depth.len(), 250);
assert_eq!(depth[79], 0);
assert_eq!(depth[80], 1);
assert_eq!(depth[118], 1);
assert_eq!(depth[119], 0);
}
#[test]
fn test_merged_gene_merge() {
let model = MergedGeneModel {
strand: '+',
exons: vec![(100, 200)],
};
let mut cov1 = MergedGeneCoverage::new();
cov1.add_coverage(0, &[(120, 150)], &model);
let mut cov2 = MergedGeneCoverage::new();
cov2.add_coverage(0, &[(130, 160)], &model);
cov1.merge(cov2);
let depth = cov1.get(0).unwrap();
assert_eq!(depth[25], 1);
assert_eq!(depth[35], 2);
assert_eq!(depth[55], 1);
}
}