use crate::core::{SeqReader, SeqRecord, LOOKUP_TABLES};
use anyhow::Result;
use clap::Args;
#[derive(Args, Debug)]
pub struct KfreqArgs {
#[arg(value_name = "kmer")]
pub kmer: String,
#[arg(value_name = "in.fa")]
pub input: String,
}
pub fn run(args: &KfreqArgs) -> Result<()> {
let len = args.kmer.len();
let mut kmer = 0u64;
for &base in args.kmer.as_bytes() {
let c = LOOKUP_TABLES.nt6[base as usize];
if !(1..=4).contains(&c) {
anyhow::bail!("Invalid k-mer sequence");
}
kmer = (kmer << 2) | ((c - 1) as u64);
}
let mask = (1u64 << (2 * len)) - 1;
let mut neighbors = vec![false; 1 << (2 * len)];
for i in 0..len {
let x = kmer & !(3u64 << (2 * i));
for j in 0..4 {
neighbors[(x | (j << (2 * i))) as usize] = true;
}
}
let mut reader = if args.input == "-" {
SeqReader::from_stdin()
} else {
SeqReader::from_path(&args.input)?
};
let mut record = SeqRecord::new(Vec::new(), Vec::new());
while reader.read_next(&mut record)? {
let mut x = [0u64; 2];
let mut k = 0usize;
let mut cnt = [0i64; 2];
let mut cnt_nei = [0i64; 2];
for &base in &record.seq {
let c = LOOKUP_TABLES.nt6[base as usize];
if (1..=4).contains(&c) {
x[0] = ((x[0] << 2) | ((c - 1) as u64)) & mask;
x[1] = (x[1] >> 2) | (((4 - c) as u64) << (2 * (len - 1)));
if k < len {
k += 1;
}
if k == len {
if x[0] == kmer {
cnt[0] += 1;
} else if x[1] == kmer {
cnt[1] += 1;
}
if neighbors[x[0] as usize] {
cnt_nei[0] += 1;
} else if neighbors[x[1] as usize] {
cnt_nei[1] += 1;
}
}
} else {
k = 0;
}
}
let which = if cnt_nei[0] > cnt_nei[1] { 0 } else { 1 };
println!(
"{}\t{}\t{}\t{}\t{}",
String::from_utf8_lossy(&record.name),
record.seq.len(),
if which == 0 { '+' } else { '-' },
cnt_nei[which],
cnt[which]
);
}
Ok(())
}