#![allow(
clippy::arithmetic_side_effects,
clippy::cast_lossless,
clippy::cast_possible_truncation,
clippy::indexing_slicing,
clippy::print_stdout,
reason = "example"
)]
use anyhow::Context;
use clap::Parser as _;
use seqair::bam::record_store::{CustomizeRecordStore, RecordStore, SlimRecord};
use seqair::reader::{Readers, SegmentOptions};
use seqair_types::RegionString;
use seqair_types::SmolStr;
use std::num::NonZeroU32;
use std::path::PathBuf;
#[derive(Debug, clap::Parser)]
struct Cli {
input: PathBuf,
reference: PathBuf,
#[clap(long, short)]
region: RegionString,
#[clap(long, default_value_t = 20)]
min_mapq: u8,
}
#[derive(Debug, Clone)]
struct ReadInfo {
read_group: Option<SmolStr>,
aligned_fraction: f64,
}
#[derive(Debug, Clone, Default)]
struct ReadInfoBuilder;
impl CustomizeRecordStore for ReadInfoBuilder {
type Extra = ReadInfo;
fn filter(&mut self, rec: &SlimRecord, _: &RecordStore<ReadInfo>) -> bool {
!rec.flags.is_unmapped() && !rec.flags.is_secondary()
}
fn compute(&mut self, rec: &SlimRecord, store: &RecordStore<ReadInfo>) -> ReadInfo {
let read_group: Option<SmolStr> =
if let Ok(aux) = rec.aux(store) { aux.get("RG").ok() } else { None };
let aligned_fraction =
if rec.seq_len > 0 { rec.matching_bases as f64 / rec.seq_len as f64 } else { 0.0 };
ReadInfo { read_group, aligned_fraction }
}
}
fn main() -> anyhow::Result<()> {
let args = Cli::parse();
let mut readers = Readers::open_customized(&args.input, &args.reference, ReadInfoBuilder)
.context("could not open BAM + FASTA")?;
let min_mapq = args.min_mapq;
let max_len = NonZeroU32::new(100_000).expect("non-zero literal");
let plan: Vec<_> = readers.segments(&args.region, SegmentOptions::new(max_len))?.collect();
eprintln!(
"Loaded pileup for {region} as {n} segment(s)",
n = plan.len(),
region = args.region.chromosome,
);
println!("pos\tdepth\tref\tread_groups\tmean_aligned_frac");
for segment in &plan {
let mut engine = readers.pileup(segment).context("could not build pileup")?;
let core = segment.core_range();
while let Some(column) = engine.pileups() {
if !core.contains(&column.pos()) {
continue;
}
let depth = column.depth();
if depth == 0 {
continue;
}
let mut rg_counts: Vec<(&str, u32)> = Vec::new();
let mut aligned_sum = 0.0;
let mut counted = 0u32;
for aln in column.alignments() {
if aln.mapq < min_mapq {
continue;
}
let info = aln.extra();
aligned_sum += info.aligned_fraction;
counted += 1;
let rg = info.read_group.as_deref().unwrap_or(".");
if let Some(entry) = rg_counts.iter_mut().find(|(name, _)| *name == rg) {
entry.1 += 1;
} else {
rg_counts.push((rg, 1));
}
}
if counted == 0 {
continue;
}
rg_counts.sort_by_key(|b| std::cmp::Reverse(b.1));
let rg_summary: Vec<String> =
rg_counts.iter().map(|(name, count)| format!("{name}:{count}")).collect();
let pos1 = column.pos().to_one_based().context("position overflow")?;
println!(
"{pos}\t{depth}\t{ref_base}\t{rgs}\t{mean:.3}",
pos = *pos1,
ref_base = column.reference_base().as_char(),
rgs = rg_summary.join(","),
mean = aligned_sum / counted as f64,
);
}
}
Ok(())
}