use anyhow::{Context, Result};
use log::debug;
use std::collections::HashMap;
use std::io::Write;
use std::path::Path;
#[derive(Debug)]
pub struct BamStatResult {
pub total_records: u64,
pub qc_failed: u64,
pub duplicates: u64,
pub non_primary: u64,
pub unmapped: u64,
pub non_unique: u64,
pub unique: u64,
pub read_1: u64,
pub read_2: u64,
pub forward: u64,
pub reverse: u64,
pub splice: u64,
pub non_splice: u64,
pub proper_pairs: u64,
pub proper_pair_diff_chrom: u64,
pub secondary: u64,
pub supplementary: u64,
pub mapped: u64,
pub paired_flagstat: u64,
pub read1_flagstat: u64,
pub read2_flagstat: u64,
pub first_fragments: u64,
pub last_fragments: u64,
pub properly_paired: u64,
pub both_mapped: u64,
pub singletons: u64,
pub mate_diff_chr: u64,
pub mate_diff_chr_mapq5: u64,
pub chrom_counts: HashMap<i32, (u64, u64)>,
pub unplaced_unmapped: u64,
pub total_len: u64,
pub total_first_fragment_len: u64,
pub total_last_fragment_len: u64,
pub bases_mapped: u64,
pub bases_mapped_cigar: u64,
pub bases_duplicated: u64,
pub max_len: u64,
pub max_first_fragment_len: u64,
pub max_last_fragment_len: u64,
pub quality_sum: f64,
pub quality_count: u64,
pub mismatches: u64,
pub is_hist: HashMap<u64, [u64; 4]>,
pub inward_pairs: u64,
pub outward_pairs: u64,
pub other_orientation: u64,
pub primary_count: u64,
pub primary_mapped: u64,
pub primary_duplicates: u64,
pub reads_mq0: u64,
pub reads_mapped_and_paired: u64,
pub rl_hist: HashMap<u64, u64>,
pub frl_hist: HashMap<u64, u64>,
pub lrl_hist: HashMap<u64, u64>,
pub mapq_hist: [u64; 256],
pub ffq: Vec<[u64; 64]>,
pub lfq: Vec<[u64; 64]>,
pub gcf: [u64; 200],
pub gcl: [u64; 200],
#[allow(dead_code)]
pub fbc: Vec<[u64; 6]>,
#[allow(dead_code)]
pub lbc: Vec<[u64; 6]>,
pub fbc_ro: Vec<[u64; 6]>,
pub lbc_ro: Vec<[u64; 6]>,
pub gcc_rc: Vec<[u64; 4]>,
pub ftc: [u64; 5],
pub ltc: [u64; 5],
pub id_hist: HashMap<u64, [u64; 2]>,
pub ic: Vec<[u64; 4]>,
pub chk: [u32; 3],
pub cov_hist: HashMap<u32, u64>,
pub gcd_bins: Vec<GcDepthBin>,
}
#[derive(Debug, Clone)]
pub struct GcDepthBin {
pub gc: f32,
pub depth: u32,
}
impl Default for BamStatResult {
fn default() -> Self {
Self {
total_records: 0,
qc_failed: 0,
duplicates: 0,
non_primary: 0,
unmapped: 0,
non_unique: 0,
unique: 0,
read_1: 0,
read_2: 0,
forward: 0,
reverse: 0,
splice: 0,
non_splice: 0,
proper_pairs: 0,
proper_pair_diff_chrom: 0,
secondary: 0,
supplementary: 0,
mapped: 0,
paired_flagstat: 0,
read1_flagstat: 0,
read2_flagstat: 0,
first_fragments: 0,
last_fragments: 0,
properly_paired: 0,
both_mapped: 0,
singletons: 0,
mate_diff_chr: 0,
mate_diff_chr_mapq5: 0,
chrom_counts: HashMap::new(),
unplaced_unmapped: 0,
total_len: 0,
total_first_fragment_len: 0,
total_last_fragment_len: 0,
bases_mapped: 0,
bases_mapped_cigar: 0,
bases_duplicated: 0,
max_len: 0,
max_first_fragment_len: 0,
max_last_fragment_len: 0,
quality_sum: 0.0,
quality_count: 0,
mismatches: 0,
is_hist: HashMap::new(),
inward_pairs: 0,
outward_pairs: 0,
other_orientation: 0,
primary_count: 0,
primary_mapped: 0,
primary_duplicates: 0,
reads_mq0: 0,
reads_mapped_and_paired: 0,
rl_hist: HashMap::new(),
frl_hist: HashMap::new(),
lrl_hist: HashMap::new(),
mapq_hist: [0u64; 256],
ffq: Vec::new(),
lfq: Vec::new(),
gcf: [0u64; 200],
gcl: [0u64; 200],
fbc: Vec::new(),
lbc: Vec::new(),
fbc_ro: Vec::new(),
lbc_ro: Vec::new(),
gcc_rc: Vec::new(),
ftc: [0u64; 5],
ltc: [0u64; 5],
id_hist: HashMap::new(),
ic: Vec::new(),
chk: [0u32; 3],
cov_hist: HashMap::new(),
gcd_bins: Vec::new(),
}
}
}
pub fn write_bam_stat(result: &BamStatResult, output_path: &Path) -> Result<()> {
let mut out = std::fs::File::create(output_path)
.with_context(|| format!("Failed to create output file: {}", output_path.display()))?;
writeln!(out, "\n#==================================================")?;
writeln!(out, "#All numbers are READ count")?;
writeln!(out, "#==================================================\n")?;
writeln!(out, "{:<40}{}\n", "Total records:", result.total_records)?;
writeln!(out, "{:<40}{}", "QC failed:", result.qc_failed)?;
writeln!(out, "{:<40}{}", "Optical/PCR duplicate:", result.duplicates)?;
writeln!(out, "{:<40}{}", "Non primary hits", result.non_primary)?;
writeln!(out, "{:<40}{}", "Unmapped reads:", result.unmapped)?;
writeln!(
out,
"{:<40}{}\n",
"mapq < mapq_cut (non-unique):", result.non_unique
)?;
writeln!(out, "{:<40}{}", "mapq >= mapq_cut (unique):", result.unique)?;
writeln!(out, "{:<40}{}", "Read-1:", result.read_1)?;
writeln!(out, "{:<40}{}", "Read-2:", result.read_2)?;
writeln!(out, "{:<40}{}", "Reads map to '+':", result.forward)?;
writeln!(out, "{:<40}{}", "Reads map to '-':", result.reverse)?;
writeln!(out, "{:<40}{}", "Non-splice reads:", result.non_splice)?;
writeln!(out, "{:<40}{}", "Splice reads:", result.splice)?;
writeln!(
out,
"{:<40}{}",
"Reads mapped in proper pairs:", result.proper_pairs
)?;
writeln!(
out,
"Proper-paired reads map to different chrom:{}",
result.proper_pair_diff_chrom
)?;
debug!("Wrote bam_stat output to {}", output_path.display());
Ok(())
}
#[cfg(test)]
mod tests {
use crate::rna::rseqc::accumulators::BamStatAccum;
use rust_htslib::bam::{self, Read as BamRead};
#[test]
fn test_bam_stat_small() {
let mut reader =
bam::Reader::from_path("tests/data/test.bam").expect("Failed to open test.bam");
let mut accum = BamStatAccum::default();
let mut record = bam::Record::new();
while let Some(res) = reader.read(&mut record) {
res.expect("Error reading BAM record");
accum.process_read(&record, 30);
}
let r = accum.into_result();
assert!(r.total_records > 0, "Should have some records");
assert_eq!(
r.total_records,
r.qc_failed + r.duplicates + r.non_primary + r.unmapped + r.non_unique + r.unique,
"Counts should sum to total"
);
}
}