#![allow(
clippy::arithmetic_side_effects,
clippy::cast_possible_truncation,
clippy::indexing_slicing,
clippy::print_stdout,
reason = "example"
)]
use anyhow::Context;
use clap::Parser as _;
use seqair::{
bam::{BamWriterBuilder, CigarOp, CigarStr, Pos0, RecordStore, cigar::CigarOpType},
reader::{IndexedReader, ResolveTid},
};
use seqair_types::{RegionString, SmolStr, smol_str::ToSmolStr};
use std::path::PathBuf;
#[derive(Debug, clap::Parser)]
struct Cli {
input: PathBuf,
#[clap(long, short)]
region: Option<RegionString>,
#[clap(long, default_value_t = 2)]
clip: u32,
#[clap(long, default_value_t = 4)]
min_match: u32,
#[clap(long, default_value_t = 5)]
show: usize,
#[clap(long, short)]
output: Option<PathBuf>,
}
fn main() -> anyhow::Result<()> {
let args = Cli::parse();
let mut reader = IndexedReader::open(&args.input).context("could not open alignment file")?;
let (tid, start, end, contig) = resolve_region(args.region.as_ref(), reader.header())?;
let mut store = RecordStore::new();
reader
.fetch_into(tid, start, end, &mut store)
.with_context(|| format!("fetch failed for {contig}:{}-{}", *start, *end))?;
println!("fetched {} record(s) from {contig}:{}-{}", store.len(), *start, *end);
let mut plan: Vec<(u32, Pos0, Vec<CigarOp>)> = Vec::new();
for idx in 0..store.len() as u32 {
let rec = store.record(idx);
if rec.flags.is_unmapped() || rec.tid < 0 {
continue;
}
let Some((new_pos, new_cigar)) =
propose_realignment(store.cigar(idx), rec.pos, args.clip, args.min_match)
else {
continue;
};
plan.push((idx, new_pos, new_cigar));
}
let total_planned = plan.len();
let mut applied = 0usize;
for (i, (idx, new_pos, new_cigar)) in plan.iter().enumerate() {
let before = snapshot(&store, *idx);
store
.set_alignment(*idx, *new_pos, new_cigar)
.with_context(|| format!("set_alignment failed for record {idx}"))?;
applied += 1;
if i < args.show {
let after = snapshot(&store, *idx);
println!(
" [{idx}] {:?} {} @ {} -> {} @ {}",
before.qname, before.cigar_str, *before.pos, after.cigar_str, *after.pos,
);
}
}
store.sort_by_pos();
let skipped = total_planned.saturating_sub(applied);
println!("realigned {applied} record(s); skipped {skipped}; store now sorted and ready");
if let Some(ref out_path) = args.output {
write_store(&store, reader.header(), out_path)?;
println!("wrote {} record(s) to {}", store.len(), out_path.display());
}
Ok(())
}
fn resolve_region(
region: Option<&RegionString>,
header: &seqair::bam::BamHeader,
) -> anyhow::Result<(u32, Pos0, Pos0, String)> {
match region {
Some(r) => {
let tid = r.chromosome.as_str().resolve_tid(header)?.as_u32();
let len = header.target_len(tid).context("contig has no length")? as u32;
let start = match r.start {
Some(p) => p.to_zero_based(),
None => Pos0::new(0).expect("0 is valid"),
};
let end = match r.end {
Some(p) => p.to_zero_based(),
None => Pos0::new(len).context("contig length out of range")?,
};
Ok((tid, start, end, r.chromosome.to_string()))
}
None => {
let tid = 0u32;
let len = header.target_len(tid).context("empty header")? as u32;
let contig = header.target_name(tid).context("unknown tid")?.to_owned();
let start = Pos0::new(0).expect("0 is valid");
let end = Pos0::new(len).context("contig length out of range")?;
Ok((tid, start, end, contig))
}
}
}
fn propose_realignment(
cigar: &[CigarOp],
pos: Pos0,
clip: u32,
min_match: u32,
) -> Option<(Pos0, Vec<CigarOp>)> {
let first = *cigar.first()?;
if clip == 0 || !matches!(first.op_type(), CigarOpType::Match) {
return None;
}
let first_len = first.len();
if first_len < min_match || first_len <= clip {
return None;
}
let new_match_len = first_len - clip;
let new_pos = Pos0::new((*pos).checked_add(clip)?)?;
let mut out = Vec::with_capacity(cigar.len() + 1);
out.push(CigarOp::new(CigarOpType::SoftClip, clip));
out.push(CigarOp::new(CigarOpType::Match, new_match_len));
out.extend_from_slice(&cigar[1..]);
Some((new_pos, out))
}
struct Snapshot {
qname: String,
pos: Pos0,
cigar_str: SmolStr,
}
fn snapshot(store: &RecordStore, idx: u32) -> Snapshot {
let rec = store.record(idx);
let qname = std::str::from_utf8(store.qname(idx)).unwrap_or("<non-utf8>").to_owned();
Snapshot { qname, pos: rec.pos, cigar_str: CigarStr(store.cigar(idx)).to_smolstr() }
}
fn write_store(
store: &RecordStore,
header: &seqair::bam::BamHeader,
path: &std::path::Path,
) -> anyhow::Result<()> {
let mut writer =
BamWriterBuilder::to_path(path, header).build().context("could not create BAM writer")?;
for i in 0..store.len() as u32 {
writer
.write_store_record(store, i)
.with_context(|| format!("could not write record {i}"))?;
}
writer.finish().context("could not finalize BAM")?;
Ok(())
}