use super::bed::*;
use rust_htslib::bam;
use rust_htslib::bam::Read;
use std::convert::TryFrom;
use std::fmt;
pub struct Nucfreq {
pub name: String,
pub pos: u32,
pub a: u64,
pub c: u64,
pub g: u64,
pub t: u64,
pub id: String,
}
impl fmt::Display for Nucfreq {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(
f,
"{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}",
self.name,
self.pos,
self.pos + 1,
self.a,
self.c,
self.g,
self.t,
self.id
)
}
}
pub struct NucList {
pub rgn: Region,
pub nucfreqs: Vec<Nucfreq>,
}
pub fn nucfreq(bam: &mut rust_htslib::bam::IndexedReader, rgn: &Region) -> Vec<Nucfreq> {
let rgn_length = usize::try_from(rgn.en - rgn.st + 1).unwrap();
let mut vec = Vec::with_capacity(rgn_length);
for p in bam.pileup() {
let pileup = p.unwrap();
if (pileup.pos() as u64) < rgn.st || (pileup.pos() as u64) >= rgn.en {
continue;
}
let mut freqs = Nucfreq {
name: rgn.name.clone(),
pos: pileup.pos(),
a: 0,
c: 0,
g: 0,
t: 0,
id: rgn.id.clone(),
};
for aln in pileup.alignments() {
if !aln.is_del() && !aln.is_refskip() {
let bp = aln.record().seq()[aln.qpos().unwrap()];
match bp {
b'A' => freqs.a += 1,
b'C' => freqs.c += 1,
b'G' => freqs.g += 1,
b'T' => freqs.t += 1,
b'N' => (),
_ => eprintln!("Seq character not recognized (u8):{:?}", bp),
}
}
}
vec.push(freqs);
}
vec
}
pub fn region_nucfreq(bam_f: &str, rgn: &Region, threads: usize) -> Vec<Nucfreq> {
eprint!("\rFinding nucfreq in: {}\t{}\t{}", rgn.name, rgn.st, rgn.en);
let mut bam =
bam::IndexedReader::from_path(bam_f).unwrap_or_else(|_| panic!("Failed to open {}", bam_f));
bam.set_threads(threads)
.expect("Failed to enable multithreaded bam reading.");
bam.fetch((&rgn.name, rgn.st as i64, rgn.en as i64))
.unwrap_or_else(|_| panic!("Is this region ({}) in your reference/bam?", rgn));
nucfreq(&mut bam, rgn)
}
pub fn print_nucfreq_header() {
print!("#chr\tstart\tend\t");
print!("A\tC\tG\tT\t");
println!("region_id");
}
pub fn print_nucfreq(vec: &[Nucfreq]) {
for nf in vec {
println!("{}", nf);
}
}
pub fn small_nucfreq(vec: &[Nucfreq]) {
let mut cur_name = "".to_string();
let mut cur_id = "".to_string();
for nf in vec {
if nf.name != cur_name || nf.id != cur_id {
cur_name = nf.name.clone();
cur_id = nf.id.clone();
println!("#{}\t{}\t{}", nf.name, nf.pos, nf.id);
}
let mut mc = [nf.a, nf.c, nf.g, nf.t];
mc.sort_unstable();
println!("{}\t{}", mc[3], mc[2]);
}
}