use crate::core::{SeqReader, SeqRecord, SeqWriter, LOOKUP_TABLES};
use anyhow::Result;
use clap::Args;
#[derive(Args, Debug)]
pub struct CutnArgs {
#[arg(value_name = "in.fa")]
pub input: String,
#[arg(short = 'n', value_name = "INT", default_value = "1000")]
pub min_n_tract: usize,
#[arg(short = 'p', value_name = "INT", default_value = "10")]
pub non_n_penalty: i64,
#[arg(short = 'g')]
pub gap_only: bool,
}
fn find_next_cut(
seq: &[u8],
k: usize,
min_n_tract: usize,
non_n_penalty: i64,
) -> Option<(usize, usize, usize)> {
let mut pos = k;
while pos < seq.len() {
if LOOKUP_TABLES.nt16[seq[pos] as usize] == 15 {
let mut score = 0i64;
let mut max_score = -1i64;
let mut end = 0usize;
for (i, &base) in seq.iter().enumerate().skip(pos) {
if LOOKUP_TABLES.nt16[base as usize] == 15 {
score += 1;
} else {
score -= non_n_penalty;
}
if score < 0 {
break;
}
if score > max_score {
max_score = score;
end = i;
}
}
let mut score = 0i64;
let mut max_score = -1i64;
let mut begin = 0usize;
for i in (0..=end).rev() {
if LOOKUP_TABLES.nt16[seq[i] as usize] == 15 {
score += 1;
} else {
score -= non_n_penalty;
}
if score < 0 {
break;
}
if score > max_score {
max_score = score;
begin = i;
}
}
if end + 1 >= begin && end + 1 - begin >= min_n_tract {
return Some((begin, end + 1, end + 1));
}
pos = end + 1;
} else {
pos += 1;
}
}
None
}
pub fn run(args: &CutnArgs) -> Result<()> {
let mut reader = if args.input == "-" {
SeqReader::from_stdin()
} else {
SeqReader::from_path(&args.input)?
};
let mut writer = SeqWriter::to_stdout().with_line_width(60);
let mut record = SeqRecord::new(Vec::new(), Vec::new());
while reader.read_next(&mut record)? {
let mut k = 0;
while let Some((begin, end, next_k)) =
find_next_cut(&record.seq, k, args.min_n_tract, args.non_n_penalty)
{
if begin != 0 {
if args.gap_only {
println!(
"{}\t{}\t{}",
String::from_utf8_lossy(&record.name),
begin,
end
);
} else {
let mut sub_record = SeqRecord::new(
format!(
"{}:{}-{}",
String::from_utf8_lossy(&record.name),
k + 1,
begin
)
.into_bytes(),
record.seq[k..begin].to_vec(),
);
if let Some(qual) = &record.qual {
sub_record.qual = Some(qual[k..begin].to_vec());
}
writer.write_record(&sub_record)?;
}
}
k = next_k;
}
if !args.gap_only && k < record.seq.len() {
let mut sub_record = SeqRecord::new(
format!(
"{}:{}-{}",
String::from_utf8_lossy(&record.name),
k + 1,
record.seq.len()
)
.into_bytes(),
record.seq[k..].to_vec(),
);
if let Some(qual) = &record.qual {
sub_record.qual = Some(qual[k..].to_vec());
}
writer.write_record(&sub_record)?;
}
}
writer.flush()?;
Ok(())
}