#![allow(
clippy::arithmetic_side_effects,
clippy::cast_possible_truncation,
clippy::doc_markdown,
clippy::indexing_slicing,
clippy::print_stdout,
reason = "example"
)]
use anyhow::Context;
use clap::Parser as _;
use seqair::bam::{AlignedPair, AlignedPairWithRef, MatchKind, RecordStore, RefSeq};
use seqair::reader::{Readers, ResolveTid};
use seqair_types::{Base, RegionString};
use std::path::PathBuf;
use std::rc::Rc;
#[derive(Debug, clap::Parser)]
struct Cli {
#[clap(long, short)]
input: PathBuf,
#[clap(long, short)]
reference: PathBuf,
#[clap(long, short)]
region: RegionString,
#[clap(long, default_value_t = 20)]
min_qual: u8,
}
#[derive(Default, Debug)]
struct Counts {
records: u32,
methylation_calls: u32,
snv_evidence: u32,
insertions: u32,
deletions: u32,
inserted_bases: u32,
deleted_bases: u32,
}
fn main() -> anyhow::Result<()> {
let args = Cli::parse();
let mut readers =
Readers::open(&args.input, &args.reference).context("could not open BAM + FASTA")?;
let tid = args.region.chromosome.as_str().resolve_tid(readers.header())?.as_u32();
let start = args
.region
.start
.context("region needs an explicit start (e.g. chr1:1000-2000)")?
.to_zero_based();
let end = args
.region
.end
.context("region needs an explicit end (e.g. chr1:1000-2000)")?
.to_zero_based();
let mut store = RecordStore::<()>::new();
readers.fetch_into(tid, start, end, &mut store)?;
let ref_bases: Rc<[Base]> =
readers.fetch_base_seq(args.region.chromosome.as_str(), start, end)?;
let ref_seq = RefSeq::new(ref_bases, start);
let mut counts = Counts::default();
for idx in 0..store.len() as u32 {
let rec = store.record(idx);
if rec.flags.is_unmapped()
|| rec.flags.is_secondary()
|| rec.flags.is_supplementary()
|| rec.flags.is_failed_qc()
|| rec.flags.is_duplicate()
{
continue;
}
counts.records += 1;
for pair in rec.aligned_pairs(&store)? {
match pair {
AlignedPair::Insertion { insert_len, .. } => {
counts.insertions += 1;
counts.inserted_bases += insert_len;
}
AlignedPair::Deletion { del_len, .. } => {
counts.deletions += 1;
counts.deleted_bases += del_len;
}
_ => {}
}
}
for ev in rec.aligned_pairs_with_read(&store)?.with_reference(&ref_seq) {
let AlignedPairWithRef::Match {
kind: _, query,
ref_base: Some(rb),
qual,
..
} = ev
else {
continue;
};
if rb == Base::C && query == Base::T && qual.get().unwrap_or(0) >= args.min_qual {
counts.methylation_calls += 1;
}
if query != rb && query != Base::Unknown && rb != Base::Unknown {
counts.snv_evidence += 1;
}
}
}
println!("region: {}", args.region);
println!("records walked: {}", counts.records);
println!(
"methylation calls: {} (ref C + query T, q≥{})",
counts.methylation_calls, args.min_qual
);
println!("SNV evidence (matches): {}", counts.snv_evidence);
println!("insertions: {} ({} bases)", counts.insertions, counts.inserted_bases);
println!("deletions: {} ({} bases)", counts.deletions, counts.deleted_bases);
Ok(())
}
#[allow(dead_code, reason = "showcase function")]
fn count_indels_per_record(
rec: &seqair::bam::record_store::SlimRecord,
store: &RecordStore<()>,
) -> anyhow::Result<(u32, u32)> {
let mut ins = 0u32;
let mut del = 0u32;
for pair in rec.aligned_pairs(store)? {
match pair {
AlignedPair::Insertion { .. } => ins += 1,
AlignedPair::Deletion { .. } => del += 1,
_ => {}
}
}
Ok((ins, del))
}
#[allow(dead_code, reason = "showcase function")]
fn collect_insertions(
rec: &seqair::bam::record_store::SlimRecord,
store: &RecordStore<()>,
) -> anyhow::Result<Vec<(u32, Vec<Base>)>> {
use seqair::bam::AlignedPairWithRead;
let mut out = Vec::new();
for ev in rec.aligned_pairs_with_read(store)? {
if let AlignedPairWithRead::Insertion { qpos, query, .. } = ev {
out.push((qpos, query.to_vec()));
}
}
Ok(out)
}
#[allow(dead_code, reason = "showcase function")]
fn count_explicit_mismatches(
rec: &seqair::bam::record_store::SlimRecord,
store: &RecordStore<()>,
) -> anyhow::Result<u32> {
let mut count = 0u32;
for pair in rec.aligned_pairs(store)? {
if let AlignedPair::Match { kind: MatchKind::SeqMismatch, .. } = pair {
count += 1;
}
}
Ok(count)
}