use crate::core::{SeqReader, SeqRecord, LOOKUP_TABLES};
use anyhow::Result;
use clap::Args;
use rustc_hash::FxHashSet;
#[derive(Args, Debug)]
pub struct TeloArgs {
#[arg(value_name = "in.fq")]
pub input: String,
#[arg(short = 'm', value_name = "STR", default_value = "CCCTAA")]
pub motif: String,
#[arg(short = 'p', value_name = "INT", default_value = "1")]
pub penalty: i64,
#[arg(short = 'd', value_name = "INT", default_value = "2000")]
pub max_drop: i64,
#[arg(short = 's', value_name = "INT", default_value = "300")]
pub min_score: i64,
#[arg(short = 'P')]
pub show_profile: bool,
}
pub fn run(args: &TeloArgs) -> Result<()> {
let penalty = args.penalty.abs();
let len = args.motif.len();
let mask = (1u64 << (2 * len)) - 1;
let mut motif_set = FxHashSet::default();
for i in 0..len {
let mut x = 0u64;
for j in 0..len {
let c = LOOKUP_TABLES.nt6[args.motif.as_bytes()[(i + j) % len] as usize];
if !(1..=4).contains(&c) {
anyhow::bail!("Invalid motif sequence");
}
x = (x << 2) | ((c - 1) as u64);
}
motif_set.insert(x);
}
let mut reader = if args.input == "-" {
SeqReader::from_stdin()
} else {
SeqReader::from_path(&args.input)?
};
let mut record = SeqRecord::new(Vec::new(), Vec::new());
let mut sum_input = 0u64;
let mut sum_telo = 0u64;
while reader.read_next(&mut record)? {
let mut x = [0u64; 2];
let mut k = 0usize;
let mut score = 0i64;
let mut max_score = 0i64;
let mut max_i = -1isize;
let mut st = 0usize;
for (i, &base) in record.seq.iter().enumerate() {
let c = LOOKUP_TABLES.nt6[base as usize];
if (1..=4).contains(&c) {
x[0] = ((x[0] << 2) | ((c - 1) as u64)) & mask;
x[1] = (x[1] >> 2) | (((4 - c) as u64) << (2 * (len - 1)));
if k < len {
k += 1;
}
if k == len {
if motif_set.contains(&x[0]) || motif_set.contains(&x[1]) {
if score == 0 {
st = i + 1 - len;
}
score += 1;
if score > max_score {
max_score = score;
max_i = i as isize;
}
} else {
score -= penalty;
if score < 0 || max_score - score > args.max_drop {
if max_score >= args.min_score {
println!(
"{}\t{}\t{}\t{}\t{}",
String::from_utf8_lossy(&record.name),
record.seq.len(),
st,
max_i + 1,
max_score
);
sum_telo += max_i as u64 + 1 - st as u64;
}
score = 0;
max_score = 0;
}
}
}
} else {
k = 0;
if max_score >= args.min_score {
println!(
"{}\t{}\t{}\t{}\t{}",
String::from_utf8_lossy(&record.name),
record.seq.len(),
st,
max_i + 1,
max_score
);
sum_telo += max_i as u64 + 1 - st as u64;
}
score = 0;
max_score = 0;
}
}
if max_score >= args.min_score {
println!(
"{}\t{}\t{}\t{}\t{}",
String::from_utf8_lossy(&record.name),
record.seq.len(),
st,
max_i + 1,
max_score
);
sum_telo += max_i as u64 + 1 - st as u64;
}
sum_input += record.seq.len() as u64;
}
eprintln!(
"Total: {}\tTelomeric: {}\tFraction: {:.4}",
sum_input,
sum_telo,
sum_telo as f64 / sum_input as f64
);
Ok(())
}