use std::fs::File;
use std::io::{BufWriter, Write};
use std::num::NonZero;
use std::path::Path;
use rsomics_bamio::raw::{self, RawRecord};
use rsomics_common::{Result, RsomicsError};
const FLAG_UNMAPPED: u16 = 0x0004;
const FLAG_REVERSE: u16 = 0x0010;
const FLAG_QCFAIL: u16 = 0x0200;
const NIBBLE_A: u8 = 1;
const NIBBLE_C: u8 = 2;
const NIBBLE_G: u8 = 4;
const NIBBLE_T: u8 = 8;
const NIBBLE_N: u8 = 15;
fn complement_nibble(nib: u8) -> u8 {
match nib {
NIBBLE_A => NIBBLE_T,
NIBBLE_T => NIBBLE_A,
NIBBLE_C => NIBBLE_G,
NIBBLE_G => NIBBLE_C,
_ => NIBBLE_N,
}
}
pub struct NvcTable {
pub counts: Vec<[u64; 6]>,
pub read_length: usize,
}
impl NvcTable {
fn new(read_length: usize) -> Self {
Self {
counts: vec![[0u64; 6]; read_length],
read_length,
}
}
}
fn nibble_to_col(nib: u8) -> usize {
match nib {
NIBBLE_A => 0,
NIBBLE_C => 1,
NIBBLE_G => 2,
NIBBLE_T => 3,
NIBBLE_N => 4,
_ => 5,
}
}
fn accumulate(table: &mut NvcTable, rec: &RawRecord, is_reverse: bool) {
let n = rec.sequence_len().min(table.read_length);
if is_reverse {
for i in 0..n {
let src = (n - 1) - i;
let nib = complement_nibble(rec.seq_nibble(src));
table.counts[i][nibble_to_col(nib)] += 1;
}
} else {
for i in 0..n {
let nib = rec.seq_nibble(i);
table.counts[i][nibble_to_col(nib)] += 1;
}
}
}
pub fn compute_nvc(bam_path: &Path, mapq_cut: u8, workers: NonZero<usize>) -> Result<NvcTable> {
let mut reader = rsomics_bamio::open_with_workers(bam_path, workers)?;
reader.read_header().map_err(RsomicsError::Io)?;
let inner = reader.get_mut();
let mut rec = RawRecord::default();
let mut table: Option<NvcTable> = None;
loop {
let n = raw::read_record(inner, &mut rec)?;
if n == 0 {
break;
}
let flags = rec.flags();
if flags & (FLAG_UNMAPPED | FLAG_QCFAIL) != 0 {
continue;
}
if rec.mapping_quality() < mapq_cut {
continue;
}
let seq_len = rec.sequence_len();
if seq_len == 0 {
continue;
}
let t = table.get_or_insert_with(|| NvcTable::new(seq_len));
let is_reverse = flags & FLAG_REVERSE != 0;
accumulate(t, &rec, is_reverse);
}
Ok(table.unwrap_or_else(|| NvcTable::new(0)))
}
pub fn write_nvc_xls(table: &NvcTable, out_prefix: &Path) -> Result<()> {
let prefix_str = out_prefix
.file_name()
.unwrap_or_default()
.to_string_lossy()
.into_owned();
let dir = out_prefix.parent().unwrap_or(Path::new("."));
let xls_path = dir.join(format!("{prefix_str}.NVC.xls"));
let f = File::create(&xls_path).map_err(RsomicsError::Io)?;
let mut w = BufWriter::new(f);
writeln!(w, "Position\tA\tC\tG\tT\tN\tX").map_err(RsomicsError::Io)?;
for (pos, row) in table.counts.iter().enumerate() {
write!(w, "{pos}").map_err(RsomicsError::Io)?;
for &count in row {
write!(w, "\t {count}").map_err(RsomicsError::Io)?;
}
writeln!(w, "\t").map_err(RsomicsError::Io)?;
}
Ok(())
}
pub fn run_nvc(
bam_path: &Path,
out_prefix: &Path,
mapq_cut: u8,
workers: NonZero<usize>,
) -> Result<()> {
eprintln!("Read BAM file ...");
let table = compute_nvc(bam_path, mapq_cut, workers)?;
eprintln!(" Done");
eprintln!("generating data matrix ...");
write_nvc_xls(&table, out_prefix)?;
eprintln!("generating R script ...");
Ok(())
}