#![allow(
clippy::arithmetic_side_effects,
clippy::cast_possible_truncation,
clippy::indexing_slicing,
reason = "example"
)]
use anyhow::Context;
use clap::Parser as _;
use seqair::Readers;
use seqair::bam::RecordStore;
use seqair::bam::pileup::PileupOp;
use seqair::reader::{Segment, SegmentOptions};
use seqair_types::{Base, RegionString};
use std::collections::HashMap;
use std::num::NonZeroU32;
use std::{
io::{BufWriter, Write},
path::PathBuf,
};
#[derive(Debug, clap::Parser)]
struct Cli {
input: PathBuf,
#[clap(long, short = 'f')]
reference: PathBuf,
#[clap(long, short)]
region: Option<RegionString>,
#[clap(long, short)]
out: Option<PathBuf>,
#[clap(long)]
samtools_compat: bool,
}
const MAX_TILE_LEN: u32 = 1_000_000;
fn main() -> anyhow::Result<()> {
let args = Cli::parse();
let mut readers =
Readers::open(&args.input, &args.reference).context("could not open alignment file")?;
let mut output: Box<dyn Write> = if let Some(out) = &args.out {
let file =
std::fs::File::create(out).with_context(|| format!("could not create {out:?}"))?;
Box::new(BufWriter::new(file))
} else {
Box::new(BufWriter::new(std::io::stdout().lock()))
};
let max_len = NonZeroU32::new(MAX_TILE_LEN).expect("non-zero literal");
let opts = SegmentOptions::new(max_len);
let plan: Vec<Segment> = if let Some(region) = args.region.as_ref() {
readers.segments(region, opts).context("could not plan region")?.collect()
} else {
readers.segments((), opts).context("could not plan whole-genome scan")?.collect()
};
let mut query_end_cache: HashMap<u32, u32> = HashMap::new();
let mut bases = String::new();
let mut quals = String::new();
for segment in &plan {
let mut engine = readers
.pileup(segment)
.with_context(|| format!("pileup failed for {}", segment.contig()))?;
if args.samtools_compat {
query_end_cache.clear();
let store = engine.store();
for i in 0..store.len() as u32 {
let rec = store.record(i);
let qend = query_end_pos(store, i, *rec.pos);
query_end_cache.insert(i, qend);
}
}
let contig_name = segment.contig().clone();
while let Some(column) = engine.pileups() {
let pos = column.pos();
let pos1 = pos.to_one_based().context("position overflow")?;
let ref_base = column.reference_base();
let store = column.store();
let mut depth = 0u32;
bases.clear();
quals.clear();
for aln in column.alignments() {
if args.samtools_compat
&& aln.qpos().is_none()
&& let Some(&qend) = query_end_cache.get(&aln.record_idx())
&& *pos >= qend
{
continue;
}
depth += 1;
let rec = store.record(aln.record_idx());
let is_reverse = aln.flags.is_reverse();
if pos == rec.pos {
bases.push('^');
#[allow(
clippy::cast_possible_truncation,
reason = "mapq is u8, +33 fits in u8"
)]
bases.push((aln.mapq.saturating_add(33)) as char);
}
format_alignment(&mut bases, &aln.op, ref_base, is_reverse, || {
read_inserted_bases(store, aln.record_idx(), &aln.op)
});
let read_end = if args.samtools_compat {
query_end_cache.get(&aln.record_idx()).copied().unwrap_or(*rec.end_pos)
} else {
*rec.end_pos
};
if *pos + 1 >= read_end {
bases.push('$');
}
match aln.qual().and_then(|q| q.get()) {
Some(q) => quals.push((q.saturating_add(33)) as char),
None => quals.push('~'),
}
}
if depth > 0 {
writeln!(
output,
"{contig_name}\t{}\t{}\t{depth}\t{bases}\t{quals}",
*pos1,
ref_base.as_char(),
)?;
}
}
}
output.flush()?;
Ok(())
}
fn query_end_pos<U>(store: &RecordStore<U>, record_idx: u32, record_pos: u32) -> u32 {
let cigar = store.cigar(record_idx);
let mut trailing_ref_len = 0u32;
for op in cigar.iter().rev() {
match op.op_code() {
2 | 3 => trailing_ref_len += op.len(), 5 | 6 => {} _ => break, }
}
let mut rlen = 0u32;
for op in cigar {
if matches!(op.op_code(), 0 | 2 | 3 | 7 | 8) {
rlen += op.len();
}
}
record_pos + rlen - trailing_ref_len
}
fn read_inserted_bases<U>(
store: &RecordStore<U>,
record_idx: u32,
op: &PileupOp,
) -> Option<Vec<u8>> {
let (qpos, insert_len) = match op {
PileupOp::Insertion { qpos, insert_len, .. } if *insert_len > 0 => (*qpos, *insert_len),
_ => return None,
};
let start = qpos as usize + 1;
let seq = store.seq(record_idx);
let end = (start + insert_len as usize).min(seq.len());
Some(seq.get(start..end)?.iter().map(|b| *b as u8).collect())
}
fn format_alignment(
buf: &mut String,
op: &PileupOp,
ref_base: Base,
is_reverse: bool,
get_ins_bases: impl FnOnce() -> Option<Vec<u8>>,
) {
use std::fmt::Write;
fn push_base(buf: &mut String, base: Base, ref_base: Base, is_reverse: bool) {
if base == ref_base && ref_base != Base::Unknown {
buf.push(if is_reverse { ',' } else { '.' });
} else {
let ch = base.as_char();
if is_reverse {
buf.push(ch.to_ascii_lowercase());
} else {
buf.push(ch.to_ascii_uppercase());
}
}
}
match op {
PileupOp::Match { base, .. } => push_base(buf, *base, ref_base, is_reverse),
PileupOp::Insertion { base, insert_len, .. } => {
push_base(buf, *base, ref_base, is_reverse);
let _ = write!(buf, "+{insert_len}");
if let Some(bases) = get_ins_bases() {
for b in &bases {
let ch = *b as char;
buf.push(if is_reverse { ch.to_ascii_lowercase() } else { ch });
}
}
}
PileupOp::Deletion { .. } => buf.push('*'),
PileupOp::ComplexIndel { insert_len, .. } => {
let _ = write!(buf, "*+{insert_len}");
}
PileupOp::RefSkip => buf.push(if is_reverse { '<' } else { '>' }),
}
}