use crate::core::{SeqReader, SeqRecord, LOOKUP_TABLES};
use anyhow::Result;
use clap::Args;
#[derive(Args, Debug)]
pub struct HetyArgs {
#[arg(value_name = "in.fa")]
pub input: String,
#[arg(short = 'w', value_name = "INT", default_value = "50000")]
pub win_size: usize,
#[arg(short = 't', value_name = "INT", default_value = "5")]
pub n_start: usize,
#[arg(short = 'm')]
pub lower_mask: bool,
}
pub fn run(args: &HetyArgs) -> Result<()> {
let mut reader = if args.input == "-" {
SeqReader::from_stdin()
} else {
SeqReader::from_path(&args.input)?
};
let win_step = args.win_size / args.n_start;
let mut buf = vec![0u8; args.win_size];
let mut record = SeqRecord::new(Vec::new(), Vec::new());
while reader.read_next(&mut record)? {
let mut cnt = [0u64; 3];
let mut next = 0usize;
for i in 0..=record.seq.len() {
if (i >= args.win_size && i % win_step == 0) || i == record.seq.len() {
if i == record.seq.len() && record.seq.len() >= args.win_size {
for y in (record.seq.len() - args.win_size)..next {
cnt[buf[y % args.win_size] as usize] -= 1;
}
}
if cnt[1] + cnt[2] > 0 {
println!(
"{}\t{}\t{}\t{:.2}\t{}\t{}",
String::from_utf8_lossy(&record.name),
next,
i,
(cnt[2] as f64) / ((cnt[1] + cnt[2]) as f64) * (args.win_size as f64),
cnt[1] + cnt[2],
cnt[2]
);
}
next = i;
}
if i < record.seq.len() {
let y = i % args.win_size;
let mut c = record.seq[i];
if args.lower_mask && c.is_ascii_lowercase() {
c = b'N';
}
let c = LOOKUP_TABLES.nt16[c as usize];
let x = LOOKUP_TABLES.bitcnt[c as usize];
if i >= args.win_size {
cnt[buf[y] as usize] -= 1;
}
buf[y] = if x == 2 {
2
} else if x == 1 {
1
} else {
0
};
cnt[buf[y] as usize] += 1;
}
}
}
Ok(())
}